chore(lsm): add initial range search
This commit is contained in:
		
							parent
							
								
									ddd21b39a6
								
							
						
					
					
						commit
						59eaef2ae3
					
				@ -44,7 +44,14 @@ func TestReverse(t *testing.T) {
 | 
				
			|||||||
func TestFile(t *testing.T) {
 | 
					func TestFile(t *testing.T) {
 | 
				
			||||||
	is := is.New(t)
 | 
						is := is.New(t)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	f := basicFile(t)
 | 
						entries := entries {
 | 
				
			||||||
 | 
							{"key-1", 1},
 | 
				
			||||||
 | 
							{"key-2", 2},
 | 
				
			||||||
 | 
							{"key-3", 3},
 | 
				
			||||||
 | 
							{"longerkey-4", 65535},
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						f := basicFile(t, entries, entries, entries)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	sf, err := ReadFile(f)
 | 
						sf, err := ReadFile(f)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
@ -52,20 +59,18 @@ func TestFile(t *testing.T) {
 | 
				
			|||||||
	is.Equal(len(sf.segments), 3)
 | 
						is.Equal(len(sf.segments), 3)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func basicFile(t *testing.T) fs.File {
 | 
					func basicFile(t *testing.T, lis ...entries) fs.File {
 | 
				
			||||||
	t.Helper()
 | 
						t.Helper()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	data := segment{entries: entries{
 | 
						segments := make([][]byte, len(lis))
 | 
				
			||||||
		{"key-1", 1},
 | 
						var err error
 | 
				
			||||||
		{"key-2", 2},
 | 
						for i, entries := range lis {
 | 
				
			||||||
		{"key-3", 3},
 | 
							data := segment{entries: entries}
 | 
				
			||||||
		{"longerkey-4", 65535},
 | 
							segments[i], err = data.MarshalBinary()
 | 
				
			||||||
	}}
 | 
							if err != nil {
 | 
				
			||||||
 | 
								t.Error(err)
 | 
				
			||||||
	b, err := data.MarshalBinary()
 | 
							}
 | 
				
			||||||
	if err != nil {
 | 
					 | 
				
			||||||
		t.Error(err)
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return NewFile(b, b, b)
 | 
						return NewFile(segments...)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										69
									
								
								lsm/sst.go
									
									
									
									
									
								
							
							
						
						
									
										69
									
								
								lsm/sst.go
									
									
									
									
									
								
							@ -1,5 +1,12 @@
 | 
				
			|||||||
// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
 | 
					// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
 | 
				
			||||||
// SPDX-License-Identifier: BSD-3-Clause
 | 
					// SPDX-License-Identifier: BSD-3-Clause
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// lsm -- Log Structured Merge-Tree
 | 
				
			||||||
 | 
					//
 | 
				
			||||||
 | 
					// This is a basic LSM tree using a SSTable optimized for append only writing. On disk data is organized into time ordered
 | 
				
			||||||
 | 
					// files of segments, containing reverse sorted keys. Each segment ends with a magic value `Souris\x01`, a 4byte hash, count of
 | 
				
			||||||
 | 
					// segment entries, and data length.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
package lsm
 | 
					package lsm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
@ -23,11 +30,11 @@ var (
 | 
				
			|||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type header struct {
 | 
					type header struct {
 | 
				
			||||||
	sig     []byte
 | 
						sig     []byte // 4Byte signature
 | 
				
			||||||
	entries uint64
 | 
						entries uint64 // count of entries in segment
 | 
				
			||||||
	datalen uint64
 | 
						datalen uint64 // length of data
 | 
				
			||||||
	headlen uint64
 | 
						headlen uint64 // length of header
 | 
				
			||||||
	end     int64
 | 
						end     int64  // location of end of data/start of header (start of data is `end - datalen`)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// ReadHead parse header from a segment. reads from the end of slice of length segmentFooterLength
 | 
					// ReadHead parse header from a segment. reads from the end of slice of length segmentFooterLength
 | 
				
			||||||
@ -173,8 +180,21 @@ func (s *segmentReader) FirstEntry() (*entryBytes, error) {
 | 
				
			|||||||
	return e, err
 | 
						return e, err
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					func (s *segmentReader) VerifyHash() (bool, error) {
 | 
				
			||||||
 | 
						h := hash()
 | 
				
			||||||
 | 
						data := make([]byte, s.head.datalen)
 | 
				
			||||||
 | 
						_, err := s.rd.ReadAt(data, s.head.end-int64(s.head.datalen))
 | 
				
			||||||
 | 
						if err != nil {
 | 
				
			||||||
 | 
							return false, err
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						_, err = h.Write(data)
 | 
				
			||||||
 | 
						ok := bytes.Equal(h.Sum(nil), s.head.sig)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return ok, err
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Find locates needle within a segment. if it cant find it will return the nearest key before needle.
 | 
					// Find locates needle within a segment. if it cant find it will return the nearest key before needle.
 | 
				
			||||||
func (s *segmentReader) Find(needle []byte) (*entryBytes, bool, error) {
 | 
					func (s *segmentReader) Find(needle []byte, first bool) (*entryBytes, bool, error) {
 | 
				
			||||||
	if s == nil {
 | 
						if s == nil {
 | 
				
			||||||
		return nil, false, nil
 | 
							return nil, false, nil
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
@ -184,23 +204,27 @@ func (s *segmentReader) Find(needle []byte) (*entryBytes, bool, error) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	last := e
 | 
						last := e
 | 
				
			||||||
 | 
						found := false
 | 
				
			||||||
	for pos > 0 {
 | 
						for pos > 0 {
 | 
				
			||||||
		key, _ := e.KeyValue()
 | 
							key, _ := e.KeyValue()
 | 
				
			||||||
		switch bytes.Compare(key, needle) {
 | 
							switch bytes.Compare(key, needle) {
 | 
				
			||||||
 | 
							case 1: // key=ccc, needle=bbb
 | 
				
			||||||
 | 
								return last, found, nil
 | 
				
			||||||
		case 0: // equal
 | 
							case 0: // equal
 | 
				
			||||||
			return e, true, nil
 | 
								if first {
 | 
				
			||||||
 | 
									return e, true, nil
 | 
				
			||||||
 | 
								}
 | 
				
			||||||
 | 
								found = true
 | 
				
			||||||
 | 
								fallthrough
 | 
				
			||||||
		case -1: // key=aaa, needle=bbb
 | 
							case -1: // key=aaa, needle=bbb
 | 
				
			||||||
			last = e
 | 
								last = e
 | 
				
			||||||
			e, pos, err = s.readEntryAt(pos)
 | 
								e, pos, err = s.readEntryAt(pos)
 | 
				
			||||||
			if err != nil {
 | 
								if err != nil {
 | 
				
			||||||
				return nil, false, err
 | 
									return nil, found, err
 | 
				
			||||||
			}
 | 
								}
 | 
				
			||||||
 | 
					 | 
				
			||||||
		case 1: // key=ccc, needle=bbb
 | 
					 | 
				
			||||||
			return last, false, nil
 | 
					 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	return last, false, nil
 | 
						return last, found, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
func (s *segmentReader) readEntryAt(pos int64) (*entryBytes, int64, error) {
 | 
					func (s *segmentReader) readEntryAt(pos int64) (*entryBytes, int64, error) {
 | 
				
			||||||
	if pos < 0 {
 | 
						if pos < 0 {
 | 
				
			||||||
@ -217,7 +241,10 @@ func (s *segmentReader) readEntryAt(pos int64) (*entryBytes, int64, error) {
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
type logFile struct {
 | 
					type logFile struct {
 | 
				
			||||||
	rd       interface{io.ReaderAt; io.WriterTo}
 | 
						rd interface {
 | 
				
			||||||
 | 
							io.ReaderAt
 | 
				
			||||||
 | 
							io.WriterTo
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
	segments []segmentReader
 | 
						segments []segmentReader
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	fs.File
 | 
						fs.File
 | 
				
			||||||
@ -232,7 +259,10 @@ func ReadFile(fd fs.File) (*logFile, error) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	eof := stat.Size()
 | 
						eof := stat.Size()
 | 
				
			||||||
	if rd, ok := fd.(interface{io.ReaderAt; io.WriterTo}); ok {
 | 
						if rd, ok := fd.(interface {
 | 
				
			||||||
 | 
							io.ReaderAt
 | 
				
			||||||
 | 
							io.WriterTo
 | 
				
			||||||
 | 
						}); ok {
 | 
				
			||||||
		l.rd = rd
 | 
							l.rd = rd
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	} else {
 | 
						} else {
 | 
				
			||||||
@ -243,8 +273,8 @@ func ReadFile(fd fs.File) (*logFile, error) {
 | 
				
			|||||||
		l.rd = bytes.NewReader(rd)
 | 
							l.rd = bytes.NewReader(rd)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						head := make([]byte, segmentFooterLength)
 | 
				
			||||||
	for eof > 0 {
 | 
						for eof > 0 {
 | 
				
			||||||
		head := make([]byte, segmentFooterLength)
 | 
					 | 
				
			||||||
		_, err = l.rd.ReadAt(head, eof-int64(segmentFooterLength))
 | 
							_, err = l.rd.ReadAt(head, eof-int64(segmentFooterLength))
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			return nil, err
 | 
								return nil, err
 | 
				
			||||||
@ -285,7 +315,7 @@ func (l *logFile) LoadSegment(pos int64) (*segmentBytes, error) {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	return &segmentBytes{b, -1}, nil
 | 
						return &segmentBytes{b, -1}, nil
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
func (l *logFile) Find(needle []byte) (*entryBytes, bool, error) {
 | 
					func (l *logFile) Find(needle []byte, first bool) (*entryBytes, bool, error) {
 | 
				
			||||||
	var last segmentReader
 | 
						var last segmentReader
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	for _, s := range l.segments {
 | 
						for _, s := range l.segments {
 | 
				
			||||||
@ -294,13 +324,16 @@ func (l *logFile) Find(needle []byte) (*entryBytes, bool, error) {
 | 
				
			|||||||
			return nil, false, err
 | 
								return nil, false, err
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		k, _ := e.KeyValue()
 | 
							k, _ := e.KeyValue()
 | 
				
			||||||
		if bytes.Compare(k, needle) > 0 {
 | 
							if first && bytes.Compare(k, needle) >= 0 {
 | 
				
			||||||
 | 
								break
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							if !first && bytes.Compare(k, needle) > 0 {
 | 
				
			||||||
			break
 | 
								break
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		last = s
 | 
							last = s
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return last.Find(needle)
 | 
						return last.Find(needle, first)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
func (l *logFile) WriteTo(w io.Writer) (int64, error) {
 | 
					func (l *logFile) WriteTo(w io.Writer) (int64, error) {
 | 
				
			||||||
	return l.rd.WriteTo(w)
 | 
						return l.rd.WriteTo(w)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,6 @@
 | 
				
			|||||||
// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
 | 
					// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
 | 
				
			||||||
// SPDX-License-Identifier: BSD-3-Clause
 | 
					// SPDX-License-Identifier: BSD-3-Clause
 | 
				
			||||||
 | 
					
 | 
				
			||||||
package lsm
 | 
					package lsm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import (
 | 
					import (
 | 
				
			||||||
@ -39,19 +40,19 @@ func TestLargeFile(t *testing.T) {
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
	t.Log(f.Stat())
 | 
						t.Log(f.Stat())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tt, ok, err := sf.Find(needle)
 | 
						tt, ok, err := sf.Find(needle, false)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
	is.True(ok)
 | 
						is.True(ok)
 | 
				
			||||||
	key, val := tt.KeyValue()
 | 
						key, val := tt.KeyValue()
 | 
				
			||||||
	t.Log(string(key), val)
 | 
						t.Log(string(key), val)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tt, ok, err = sf.Find([]byte("needle"))
 | 
						tt, ok, err = sf.Find([]byte("needle"), false)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
	is.True(!ok)
 | 
						is.True(!ok)
 | 
				
			||||||
	key, val = tt.KeyValue()
 | 
						key, val = tt.KeyValue()
 | 
				
			||||||
	t.Log(string(key), val)
 | 
						t.Log(string(key), val)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tt, ok, err = sf.Find([]byte{'\xff'})
 | 
						tt, ok, err = sf.Find([]byte{'\xff'}, false)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
	is.True(!ok)
 | 
						is.True(!ok)
 | 
				
			||||||
	key, val = tt.KeyValue()
 | 
						key, val = tt.KeyValue()
 | 
				
			||||||
@ -85,23 +86,28 @@ func TestLargeFileDisk(t *testing.T) {
 | 
				
			|||||||
		is.NoErr(err)
 | 
							is.NoErr(err)
 | 
				
			||||||
		k, v := e.KeyValue()
 | 
							k, v := e.KeyValue()
 | 
				
			||||||
		needle = k
 | 
							needle = k
 | 
				
			||||||
		t.Logf("Segment-%d: %s = %d", i, k, v)
 | 
					
 | 
				
			||||||
 | 
							ok, err := s.VerifyHash()
 | 
				
			||||||
 | 
							is.NoErr(err)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							t.Logf("Segment-%d: %s = %d %t", i, k, v, ok)
 | 
				
			||||||
 | 
							is.True(ok)
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	t.Log(f.Stat())
 | 
						t.Log(f.Stat())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tt, ok, err := sf.Find(needle)
 | 
						tt, ok, err := sf.Find(needle, false)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
	is.True(ok)
 | 
						is.True(ok)
 | 
				
			||||||
	key, val := tt.KeyValue()
 | 
						key, val := tt.KeyValue()
 | 
				
			||||||
	t.Log(string(key), val)
 | 
						t.Log(string(key), val)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tt, ok, err = sf.Find([]byte("needle"))
 | 
						tt, ok, err = sf.Find([]byte("needle"), false)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
	is.True(!ok)
 | 
						is.True(!ok)
 | 
				
			||||||
	key, val = tt.KeyValue()
 | 
						key, val = tt.KeyValue()
 | 
				
			||||||
	t.Log(string(key), val)
 | 
						t.Log(string(key), val)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	tt, ok, err = sf.Find([]byte{'\xff'})
 | 
						tt, ok, err = sf.Find([]byte{'\xff'}, false)
 | 
				
			||||||
	is.NoErr(err)
 | 
						is.NoErr(err)
 | 
				
			||||||
	is.True(!ok)
 | 
						is.True(!ok)
 | 
				
			||||||
	key, val = tt.KeyValue()
 | 
						key, val = tt.KeyValue()
 | 
				
			||||||
@ -133,7 +139,7 @@ func BenchmarkLargeFile(b *testing.B) {
 | 
				
			|||||||
		if each > 0 && n%each == 0 {
 | 
							if each > 0 && n%each == 0 {
 | 
				
			||||||
			b.Log(n)
 | 
								b.Log(n)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
		_, ok, err := sf.Find(keys[n])
 | 
							_, ok, err := sf.Find(keys[n], false)
 | 
				
			||||||
		if err != nil {
 | 
							if err != nil {
 | 
				
			||||||
			b.Error(err)
 | 
								b.Error(err)
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@ -144,40 +150,48 @@ func BenchmarkLargeFile(b *testing.B) {
 | 
				
			|||||||
	b.Log("okays=", b.N, okays)
 | 
						b.Log("okays=", b.N, okays)
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func BenchmarkLargeFileB(b *testing.B) {
 | 
					// TestFindRange is an initial range find for start and stop of a range of needles.
 | 
				
			||||||
	segCount := 4098 / 16
 | 
					// TODO: start the second query from where the first left off. Use an iterator?
 | 
				
			||||||
	f := randFile(b, 2_000_000, segCount)
 | 
					func TestFindRange(t *testing.T) {
 | 
				
			||||||
 | 
						is := is.New(t)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						f := basicFile(t, 
 | 
				
			||||||
 | 
							entries{
 | 
				
			||||||
 | 
								{"AD", 5},
 | 
				
			||||||
 | 
								{"AC", 5},
 | 
				
			||||||
 | 
								{"AB", 4},
 | 
				
			||||||
 | 
								{"AB", 3},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
							entries{
 | 
				
			||||||
 | 
								{"AB", 2},
 | 
				
			||||||
 | 
								{"AA", 1},
 | 
				
			||||||
 | 
							},
 | 
				
			||||||
 | 
						)
 | 
				
			||||||
	sf, err := ReadFile(f)
 | 
						sf, err := ReadFile(f)
 | 
				
			||||||
	if err != nil {
 | 
						is.NoErr(err)
 | 
				
			||||||
		b.Error(err)
 | 
					
 | 
				
			||||||
	}
 | 
						var ok bool
 | 
				
			||||||
	key := make([]byte, 5)
 | 
						var first, last  *entryBytes
 | 
				
			||||||
	keys := make([][]byte, b.N)
 | 
					
 | 
				
			||||||
	for i := range keys {
 | 
						first, ok, err = sf.Find([]byte("AB"), true)
 | 
				
			||||||
		_, err = crand.Read(key)
 | 
						is.NoErr(err)
 | 
				
			||||||
		if err != nil {
 | 
					
 | 
				
			||||||
			b.Error(err)
 | 
						key, val := first.KeyValue()
 | 
				
			||||||
		}
 | 
						t.Log(string(key), val)
 | 
				
			||||||
		keys[i] = []byte(base64.RawURLEncoding.EncodeToString(key))
 | 
					
 | 
				
			||||||
	}
 | 
						is.True(ok)
 | 
				
			||||||
	b.Log("ready", b.N)
 | 
						is.Equal(key, []byte("AB"))
 | 
				
			||||||
	b.ResetTimer()
 | 
						is.Equal(val, uint64(2))
 | 
				
			||||||
	okays := 0
 | 
					
 | 
				
			||||||
	each := b.N / 10
 | 
						last, ok, err = sf.Find([]byte("AC"), false)
 | 
				
			||||||
	for n := 0; n < b.N; n++ {
 | 
						is.NoErr(err)
 | 
				
			||||||
		if each > 0 && n%each == 0 {
 | 
					
 | 
				
			||||||
			b.Log(n)
 | 
						key, val = last.KeyValue()
 | 
				
			||||||
		}
 | 
						t.Log(string(key), val)
 | 
				
			||||||
		_, ok, err := sf.Find(keys[n])
 | 
					
 | 
				
			||||||
		if err != nil {
 | 
						is.True(ok)
 | 
				
			||||||
			b.Error(err)
 | 
						is.Equal(key, []byte("AC"))
 | 
				
			||||||
		}
 | 
						is.Equal(val, uint64(5))
 | 
				
			||||||
		if ok {
 | 
					 | 
				
			||||||
			okays++
 | 
					 | 
				
			||||||
		}
 | 
					 | 
				
			||||||
	}
 | 
					 | 
				
			||||||
	b.Log("okays=", b.N, okays)
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
func randFile(t interface {
 | 
					func randFile(t interface {
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user