chore(lsm): add initial range search
This commit is contained in:
parent
ddd21b39a6
commit
59eaef2ae3
|
@ -44,7 +44,14 @@ func TestReverse(t *testing.T) {
|
||||||
func TestFile(t *testing.T) {
|
func TestFile(t *testing.T) {
|
||||||
is := is.New(t)
|
is := is.New(t)
|
||||||
|
|
||||||
f := basicFile(t)
|
entries := entries {
|
||||||
|
{"key-1", 1},
|
||||||
|
{"key-2", 2},
|
||||||
|
{"key-3", 3},
|
||||||
|
{"longerkey-4", 65535},
|
||||||
|
}
|
||||||
|
|
||||||
|
f := basicFile(t, entries, entries, entries)
|
||||||
|
|
||||||
sf, err := ReadFile(f)
|
sf, err := ReadFile(f)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
|
@ -52,20 +59,18 @@ func TestFile(t *testing.T) {
|
||||||
is.Equal(len(sf.segments), 3)
|
is.Equal(len(sf.segments), 3)
|
||||||
}
|
}
|
||||||
|
|
||||||
func basicFile(t *testing.T) fs.File {
|
func basicFile(t *testing.T, lis ...entries) fs.File {
|
||||||
t.Helper()
|
t.Helper()
|
||||||
|
|
||||||
data := segment{entries: entries{
|
segments := make([][]byte, len(lis))
|
||||||
{"key-1", 1},
|
var err error
|
||||||
{"key-2", 2},
|
for i, entries := range lis {
|
||||||
{"key-3", 3},
|
data := segment{entries: entries}
|
||||||
{"longerkey-4", 65535},
|
segments[i], err = data.MarshalBinary()
|
||||||
}}
|
|
||||||
|
|
||||||
b, err := data.MarshalBinary()
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Error(err)
|
t.Error(err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return NewFile(b, b, b)
|
|
||||||
|
return NewFile(segments...)
|
||||||
}
|
}
|
||||||
|
|
69
lsm/sst.go
69
lsm/sst.go
|
@ -1,5 +1,12 @@
|
||||||
// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
|
// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
|
||||||
// SPDX-License-Identifier: BSD-3-Clause
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
|
// lsm -- Log Structured Merge-Tree
|
||||||
|
//
|
||||||
|
// This is a basic LSM tree using a SSTable optimized for append only writing. On disk data is organized into time ordered
|
||||||
|
// files of segments, containing reverse sorted keys. Each segment ends with a magic value `Souris\x01`, a 4byte hash, count of
|
||||||
|
// segment entries, and data length.
|
||||||
|
|
||||||
package lsm
|
package lsm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -23,11 +30,11 @@ var (
|
||||||
)
|
)
|
||||||
|
|
||||||
type header struct {
|
type header struct {
|
||||||
sig []byte
|
sig []byte // 4Byte signature
|
||||||
entries uint64
|
entries uint64 // count of entries in segment
|
||||||
datalen uint64
|
datalen uint64 // length of data
|
||||||
headlen uint64
|
headlen uint64 // length of header
|
||||||
end int64
|
end int64 // location of end of data/start of header (start of data is `end - datalen`)
|
||||||
}
|
}
|
||||||
|
|
||||||
// ReadHead parse header from a segment. reads from the end of slice of length segmentFooterLength
|
// ReadHead parse header from a segment. reads from the end of slice of length segmentFooterLength
|
||||||
|
@ -173,8 +180,21 @@ func (s *segmentReader) FirstEntry() (*entryBytes, error) {
|
||||||
return e, err
|
return e, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *segmentReader) VerifyHash() (bool, error) {
|
||||||
|
h := hash()
|
||||||
|
data := make([]byte, s.head.datalen)
|
||||||
|
_, err := s.rd.ReadAt(data, s.head.end-int64(s.head.datalen))
|
||||||
|
if err != nil {
|
||||||
|
return false, err
|
||||||
|
}
|
||||||
|
_, err = h.Write(data)
|
||||||
|
ok := bytes.Equal(h.Sum(nil), s.head.sig)
|
||||||
|
|
||||||
|
return ok, err
|
||||||
|
}
|
||||||
|
|
||||||
// Find locates needle within a segment. if it cant find it will return the nearest key before needle.
|
// Find locates needle within a segment. if it cant find it will return the nearest key before needle.
|
||||||
func (s *segmentReader) Find(needle []byte) (*entryBytes, bool, error) {
|
func (s *segmentReader) Find(needle []byte, first bool) (*entryBytes, bool, error) {
|
||||||
if s == nil {
|
if s == nil {
|
||||||
return nil, false, nil
|
return nil, false, nil
|
||||||
}
|
}
|
||||||
|
@ -184,23 +204,27 @@ func (s *segmentReader) Find(needle []byte) (*entryBytes, bool, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
last := e
|
last := e
|
||||||
|
found := false
|
||||||
for pos > 0 {
|
for pos > 0 {
|
||||||
key, _ := e.KeyValue()
|
key, _ := e.KeyValue()
|
||||||
switch bytes.Compare(key, needle) {
|
switch bytes.Compare(key, needle) {
|
||||||
|
case 1: // key=ccc, needle=bbb
|
||||||
|
return last, found, nil
|
||||||
case 0: // equal
|
case 0: // equal
|
||||||
|
if first {
|
||||||
return e, true, nil
|
return e, true, nil
|
||||||
|
}
|
||||||
|
found = true
|
||||||
|
fallthrough
|
||||||
case -1: // key=aaa, needle=bbb
|
case -1: // key=aaa, needle=bbb
|
||||||
last = e
|
last = e
|
||||||
e, pos, err = s.readEntryAt(pos)
|
e, pos, err = s.readEntryAt(pos)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, false, err
|
return nil, found, err
|
||||||
}
|
|
||||||
|
|
||||||
case 1: // key=ccc, needle=bbb
|
|
||||||
return last, false, nil
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return last, false, nil
|
}
|
||||||
|
return last, found, nil
|
||||||
}
|
}
|
||||||
func (s *segmentReader) readEntryAt(pos int64) (*entryBytes, int64, error) {
|
func (s *segmentReader) readEntryAt(pos int64) (*entryBytes, int64, error) {
|
||||||
if pos < 0 {
|
if pos < 0 {
|
||||||
|
@ -217,7 +241,10 @@ func (s *segmentReader) readEntryAt(pos int64) (*entryBytes, int64, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type logFile struct {
|
type logFile struct {
|
||||||
rd interface{io.ReaderAt; io.WriterTo}
|
rd interface {
|
||||||
|
io.ReaderAt
|
||||||
|
io.WriterTo
|
||||||
|
}
|
||||||
segments []segmentReader
|
segments []segmentReader
|
||||||
|
|
||||||
fs.File
|
fs.File
|
||||||
|
@ -232,7 +259,10 @@ func ReadFile(fd fs.File) (*logFile, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
eof := stat.Size()
|
eof := stat.Size()
|
||||||
if rd, ok := fd.(interface{io.ReaderAt; io.WriterTo}); ok {
|
if rd, ok := fd.(interface {
|
||||||
|
io.ReaderAt
|
||||||
|
io.WriterTo
|
||||||
|
}); ok {
|
||||||
l.rd = rd
|
l.rd = rd
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -243,8 +273,8 @@ func ReadFile(fd fs.File) (*logFile, error) {
|
||||||
l.rd = bytes.NewReader(rd)
|
l.rd = bytes.NewReader(rd)
|
||||||
}
|
}
|
||||||
|
|
||||||
for eof > 0 {
|
|
||||||
head := make([]byte, segmentFooterLength)
|
head := make([]byte, segmentFooterLength)
|
||||||
|
for eof > 0 {
|
||||||
_, err = l.rd.ReadAt(head, eof-int64(segmentFooterLength))
|
_, err = l.rd.ReadAt(head, eof-int64(segmentFooterLength))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -285,7 +315,7 @@ func (l *logFile) LoadSegment(pos int64) (*segmentBytes, error) {
|
||||||
|
|
||||||
return &segmentBytes{b, -1}, nil
|
return &segmentBytes{b, -1}, nil
|
||||||
}
|
}
|
||||||
func (l *logFile) Find(needle []byte) (*entryBytes, bool, error) {
|
func (l *logFile) Find(needle []byte, first bool) (*entryBytes, bool, error) {
|
||||||
var last segmentReader
|
var last segmentReader
|
||||||
|
|
||||||
for _, s := range l.segments {
|
for _, s := range l.segments {
|
||||||
|
@ -294,13 +324,16 @@ func (l *logFile) Find(needle []byte) (*entryBytes, bool, error) {
|
||||||
return nil, false, err
|
return nil, false, err
|
||||||
}
|
}
|
||||||
k, _ := e.KeyValue()
|
k, _ := e.KeyValue()
|
||||||
if bytes.Compare(k, needle) > 0 {
|
if first && bytes.Compare(k, needle) >= 0 {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if !first && bytes.Compare(k, needle) > 0 {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
last = s
|
last = s
|
||||||
}
|
}
|
||||||
|
|
||||||
return last.Find(needle)
|
return last.Find(needle, first)
|
||||||
}
|
}
|
||||||
func (l *logFile) WriteTo(w io.Writer) (int64, error) {
|
func (l *logFile) WriteTo(w io.Writer) (int64, error) {
|
||||||
return l.rd.WriteTo(w)
|
return l.rd.WriteTo(w)
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
|
// SPDX-FileCopyrightText: 2023 Jon Lundy <jon@xuu.cc>
|
||||||
// SPDX-License-Identifier: BSD-3-Clause
|
// SPDX-License-Identifier: BSD-3-Clause
|
||||||
|
|
||||||
package lsm
|
package lsm
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -39,19 +40,19 @@ func TestLargeFile(t *testing.T) {
|
||||||
}
|
}
|
||||||
t.Log(f.Stat())
|
t.Log(f.Stat())
|
||||||
|
|
||||||
tt, ok, err := sf.Find(needle)
|
tt, ok, err := sf.Find(needle, false)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
is.True(ok)
|
is.True(ok)
|
||||||
key, val := tt.KeyValue()
|
key, val := tt.KeyValue()
|
||||||
t.Log(string(key), val)
|
t.Log(string(key), val)
|
||||||
|
|
||||||
tt, ok, err = sf.Find([]byte("needle"))
|
tt, ok, err = sf.Find([]byte("needle"), false)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
is.True(!ok)
|
is.True(!ok)
|
||||||
key, val = tt.KeyValue()
|
key, val = tt.KeyValue()
|
||||||
t.Log(string(key), val)
|
t.Log(string(key), val)
|
||||||
|
|
||||||
tt, ok, err = sf.Find([]byte{'\xff'})
|
tt, ok, err = sf.Find([]byte{'\xff'}, false)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
is.True(!ok)
|
is.True(!ok)
|
||||||
key, val = tt.KeyValue()
|
key, val = tt.KeyValue()
|
||||||
|
@ -85,23 +86,28 @@ func TestLargeFileDisk(t *testing.T) {
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
k, v := e.KeyValue()
|
k, v := e.KeyValue()
|
||||||
needle = k
|
needle = k
|
||||||
t.Logf("Segment-%d: %s = %d", i, k, v)
|
|
||||||
|
ok, err := s.VerifyHash()
|
||||||
|
is.NoErr(err)
|
||||||
|
|
||||||
|
t.Logf("Segment-%d: %s = %d %t", i, k, v, ok)
|
||||||
|
is.True(ok)
|
||||||
}
|
}
|
||||||
t.Log(f.Stat())
|
t.Log(f.Stat())
|
||||||
|
|
||||||
tt, ok, err := sf.Find(needle)
|
tt, ok, err := sf.Find(needle, false)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
is.True(ok)
|
is.True(ok)
|
||||||
key, val := tt.KeyValue()
|
key, val := tt.KeyValue()
|
||||||
t.Log(string(key), val)
|
t.Log(string(key), val)
|
||||||
|
|
||||||
tt, ok, err = sf.Find([]byte("needle"))
|
tt, ok, err = sf.Find([]byte("needle"), false)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
is.True(!ok)
|
is.True(!ok)
|
||||||
key, val = tt.KeyValue()
|
key, val = tt.KeyValue()
|
||||||
t.Log(string(key), val)
|
t.Log(string(key), val)
|
||||||
|
|
||||||
tt, ok, err = sf.Find([]byte{'\xff'})
|
tt, ok, err = sf.Find([]byte{'\xff'}, false)
|
||||||
is.NoErr(err)
|
is.NoErr(err)
|
||||||
is.True(!ok)
|
is.True(!ok)
|
||||||
key, val = tt.KeyValue()
|
key, val = tt.KeyValue()
|
||||||
|
@ -133,7 +139,7 @@ func BenchmarkLargeFile(b *testing.B) {
|
||||||
if each > 0 && n%each == 0 {
|
if each > 0 && n%each == 0 {
|
||||||
b.Log(n)
|
b.Log(n)
|
||||||
}
|
}
|
||||||
_, ok, err := sf.Find(keys[n])
|
_, ok, err := sf.Find(keys[n], false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
b.Error(err)
|
b.Error(err)
|
||||||
}
|
}
|
||||||
|
@ -144,40 +150,48 @@ func BenchmarkLargeFile(b *testing.B) {
|
||||||
b.Log("okays=", b.N, okays)
|
b.Log("okays=", b.N, okays)
|
||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkLargeFileB(b *testing.B) {
|
// TestFindRange is an initial range find for start and stop of a range of needles.
|
||||||
segCount := 4098 / 16
|
// TODO: start the second query from where the first left off. Use an iterator?
|
||||||
f := randFile(b, 2_000_000, segCount)
|
func TestFindRange(t *testing.T) {
|
||||||
|
is := is.New(t)
|
||||||
|
|
||||||
|
f := basicFile(t,
|
||||||
|
entries{
|
||||||
|
{"AD", 5},
|
||||||
|
{"AC", 5},
|
||||||
|
{"AB", 4},
|
||||||
|
{"AB", 3},
|
||||||
|
},
|
||||||
|
entries{
|
||||||
|
{"AB", 2},
|
||||||
|
{"AA", 1},
|
||||||
|
},
|
||||||
|
)
|
||||||
sf, err := ReadFile(f)
|
sf, err := ReadFile(f)
|
||||||
if err != nil {
|
is.NoErr(err)
|
||||||
b.Error(err)
|
|
||||||
}
|
var ok bool
|
||||||
key := make([]byte, 5)
|
var first, last *entryBytes
|
||||||
keys := make([][]byte, b.N)
|
|
||||||
for i := range keys {
|
first, ok, err = sf.Find([]byte("AB"), true)
|
||||||
_, err = crand.Read(key)
|
is.NoErr(err)
|
||||||
if err != nil {
|
|
||||||
b.Error(err)
|
key, val := first.KeyValue()
|
||||||
}
|
t.Log(string(key), val)
|
||||||
keys[i] = []byte(base64.RawURLEncoding.EncodeToString(key))
|
|
||||||
}
|
is.True(ok)
|
||||||
b.Log("ready", b.N)
|
is.Equal(key, []byte("AB"))
|
||||||
b.ResetTimer()
|
is.Equal(val, uint64(2))
|
||||||
okays := 0
|
|
||||||
each := b.N / 10
|
last, ok, err = sf.Find([]byte("AC"), false)
|
||||||
for n := 0; n < b.N; n++ {
|
is.NoErr(err)
|
||||||
if each > 0 && n%each == 0 {
|
|
||||||
b.Log(n)
|
key, val = last.KeyValue()
|
||||||
}
|
t.Log(string(key), val)
|
||||||
_, ok, err := sf.Find(keys[n])
|
|
||||||
if err != nil {
|
is.True(ok)
|
||||||
b.Error(err)
|
is.Equal(key, []byte("AC"))
|
||||||
}
|
is.Equal(val, uint64(5))
|
||||||
if ok {
|
|
||||||
okays++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
b.Log("okays=", b.N, okays)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func randFile(t interface {
|
func randFile(t interface {
|
||||||
|
|
Loading…
Reference in New Issue
Block a user