go-pkg/lsm/sst_test.go

301 lines
7.7 KiB
Go
Raw Normal View History

2024-06-10 21:12:11 -06:00
package lsm
import (
"bytes"
"encoding/base64"
2024-11-09 09:32:29 -07:00
"errors"
2024-06-10 21:12:11 -06:00
"io"
2024-11-09 09:32:29 -07:00
"iter"
"slices"
2024-06-10 21:12:11 -06:00
"testing"
2024-11-09 09:32:29 -07:00
"github.com/docopt/docopt-go"
2024-06-10 21:12:11 -06:00
"github.com/matryer/is"
)
2024-11-09 09:32:29 -07:00
// TestWriteLogFile tests AppendLogFile and WriteLogFile against a set of test cases.
//
// Each test case contains a slice of slices of io.Readers, which are passed to
// AppendLogFile and WriteLogFile in order. The test case also contains the
// expected encoded output as a base64 string, as well as the expected output
// when the file is read back using ReadLogFile.
//
// The test case also contains the expected output when the file is read back in
// reverse order using ReadLogFile.Rev().
//
// The test cases are as follows:
//
// - nil reader: Passes a nil slice of io.Readers to WriteLogFile.
// - err reader: Passes a slice of io.Readers to WriteLogFile which returns an
// error when read.
// - single reader: Passes a single io.Reader to WriteLogFile.
// - multiple readers: Passes a slice of multiple io.Readers to WriteLogFile.
// - multiple commit: Passes multiple slices of io.Readers to AppendLogFile.
// - multiple commit 3x: Passes multiple slices of io.Readers to AppendLogFile
// three times.
//
// The test uses the is package from github.com/matryer/is to check that the
// output matches the expected output.
func TestWriteLogFile(t *testing.T) {
type test struct {
name string
in [][]io.Reader
enc string
out [][]byte
rev [][]byte
}
tests := []test{
{
name: "nil reader",
in: nil,
enc: "U291ci5pcwAAAwACAA",
out: [][]byte{},
rev: [][]byte{},
},
{
name: "err reader",
in: nil,
enc: "U291ci5pcwAAAwACAA",
out: [][]byte{},
rev: [][]byte{},
},
{
name: "single reader",
in: [][]io.Reader{
{
bytes.NewBuffer([]byte{1, 2, 3, 4})}},
enc: "U291ci5pcwAAE756XndRZXhdAAYBAgMEAQQBAhA",
out: [][]byte{{1, 2, 3, 4}},
rev: [][]byte{{1, 2, 3, 4}}},
{
name: "multiple readers",
in: [][]io.Reader{
{
bytes.NewBuffer([]byte{1, 2, 3, 4}),
bytes.NewBuffer([]byte{5, 6, 7, 8})}},
enc: "U291ci5pcwAAI756XndRZXhdAAYBAgMEAQRhQyZWDDn5BQAGBQYHCAEEAgIg",
out: [][]byte{{1, 2, 3, 4}, {5, 6, 7, 8}},
rev: [][]byte{{5, 6, 7, 8}, {1, 2, 3, 4}}},
{
name: "multiple commit",
in: [][]io.Reader{
{
bytes.NewBuffer([]byte{1, 2, 3, 4})},
{
bytes.NewBuffer([]byte{5, 6, 7, 8})}},
enc: "U291ci5pcwAAJr56XndRZXhdAAYBAgMEAQQBAhBhQyZWDDn5BQAGBQYHCAEEAgIQ",
out: [][]byte{{1, 2, 3, 4}, {5, 6, 7, 8}},
rev: [][]byte{{5, 6, 7, 8}, {1, 2, 3, 4}}},
{
name: "multiple commit",
in: [][]io.Reader{
{
bytes.NewBuffer([]byte{1, 2, 3, 4}),
bytes.NewBuffer([]byte{5, 6, 7, 8})},
{
bytes.NewBuffer([]byte{9, 10, 11, 12})},
},
enc: "U291ci5pcwAANr56XndRZXhdAAYBAgMEAQRhQyZWDDn5BQAGBQYHCAEEAgIgA4Buuio8Ro0ABgkKCwwBBAMCEA",
out: [][]byte{{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10, 11, 12}},
rev: [][]byte{{9, 10, 11, 12}, {5, 6, 7, 8}, {1, 2, 3, 4}}},
{
name: "multiple commit 3x",
in: [][]io.Reader{
{
bytes.NewBuffer([]byte{1, 2, 3}),
bytes.NewBuffer([]byte{4, 5, 6}),
},
{
bytes.NewBuffer([]byte{7, 8, 9}),
},
{
bytes.NewBuffer([]byte{10, 11, 12}),
bytes.NewBuffer([]byte{13, 14, 15}),
},
},
enc: "U291ci5pcwAAVNCqYhhnLPWrAAUBAgMBA7axWhhYd+HsAAUEBQYBAwICHr9ryhhdbkEZAAUHCAkBAwMCDy/UIhidCwCqAAUKCwwBA/NCwhh6wXgXAAUNDg8BAwUCHg",
out: [][]byte{{1, 2, 3}, {4, 5, 6}, {7, 8, 9}, {10, 11, 12}, {13, 14, 15}},
rev: [][]byte{{13, 14, 15}, {10, 11, 12}, {7, 8, 9}, {4, 5, 6}, {1, 2, 3}}},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
is := is.New(t)
buf := &buffer{}
buffers := 0
if len(test.in) == 0 {
err := WriteLogFile(buf, slices.Values([]io.Reader{}))
is.NoErr(err)
}
for i, in := range test.in {
buffers += len(in)
if i == 0 {
err := WriteLogFile(buf, slices.Values(in))
is.NoErr(err)
} else {
err := AppendLogFile(buf, slices.Values(in))
is.NoErr(err)
}
}
is.Equal(base64.RawStdEncoding.EncodeToString(buf.Bytes()), test.enc)
files, err := ReadLogFile(bytes.NewReader(buf.Bytes()))
is.NoErr(err)
is.Equal(files.Size(), uint64(len(buf.Bytes())))
i := 0
for bi, fp := range files.Iter(0) {
buf, err := io.ReadAll(fp)
is.NoErr(err)
hash := hash()
hash.Write(buf)
is.Equal(bi.Hash, hash.Sum(nil)[:len(bi.Hash)])
is.True(len(test.out) > int(bi.Index))
is.Equal(buf, test.out[bi.Index])
i++
}
is.NoErr(files.Err)
is.Equal(i, buffers)
i = 0
for bi, fp := range files.Rev(files.Count()) {
buf, err := io.ReadAll(fp)
is.NoErr(err)
hash := hash()
hash.Write(buf)
is.Equal(bi.Hash, hash.Sum(nil)[:len(bi.Hash)])
is.Equal(buf, test.rev[i])
is.Equal(buf, test.out[bi.Index])
i++
}
is.NoErr(files.Err)
is.Equal(i, buffers)
is.Equal(files.Count(), uint64(i))
})
2024-06-10 21:12:11 -06:00
}
}
2024-11-09 09:32:29 -07:00
// TestArgs tests that the CLI arguments are correctly parsed.
func TestArgs(t *testing.T) {
2024-06-10 21:12:11 -06:00
is := is.New(t)
2024-11-09 09:32:29 -07:00
usage := `Usage: lsm2 create <archive> <files>...`
2024-06-10 21:12:11 -06:00
2024-11-09 09:32:29 -07:00
arguments, err := docopt.ParseArgs(usage, []string{"create", "archive", "file1", "file2"}, "1.0")
2024-06-10 21:12:11 -06:00
is.NoErr(err)
2024-11-09 09:32:29 -07:00
var params struct {
Create bool `docopt:"create"`
Archive string `docopt:"<archive>"`
Files []string `docopt:"<files>"`
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
err = arguments.Bind(&params)
2024-06-10 21:12:11 -06:00
is.NoErr(err)
2024-11-09 09:32:29 -07:00
is.Equal(params.Create, true)
is.Equal(params.Archive, "archive")
is.Equal(params.Files, []string{"file1", "file2"})
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
func BenchmarkIterate(b *testing.B) {
block := make([]byte, 1024)
buf := &buffer{}
b.Run("write", func(b *testing.B) {
WriteLogFile(buf, func(yield func(io.Reader) bool) {
for range (b.N) {
if !yield(bytes.NewBuffer(block)) {
break
}
}
})
})
b.Run("read", func(b *testing.B) {
lf, _ := ReadLogFile(buf)
b.Log(lf.Count())
for range (b.N) {
for _, fp := range lf.Iter(0) {
_, _ = io.Copy(io.Discard, fp)
break
}
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
})
b.Run("rev", func(b *testing.B) {
lf, _ := ReadLogFile(buf)
b.Log(lf.Count())
for range (b.N) {
for _, fp := range lf.Rev(lf.Count()) {
_, _ = io.Copy(io.Discard, fp)
break
}
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
})
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
type buffer struct {
buf []byte
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
// Bytes returns the underlying byte slice of the bufferWriterAt.
func (b *buffer) Bytes() []byte {
return b.buf
}
2024-06-10 21:12:11 -06:00
2024-11-09 09:32:29 -07:00
// WriteAt implements io.WriterAt. It appends data to the internal buffer
// if the offset is beyond the current length of the buffer. It will
// return an error if the offset is negative.
func (b *buffer) WriteAt(data []byte, offset int64) (written int, err error) {
if offset < 0 {
return 0, errors.New("negative offset")
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
currentLength := int64(len(b.buf))
if currentLength < offset+int64(len(data)) {
b.buf = append(b.buf, make([]byte, offset+int64(len(data))-currentLength)...)
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
written = copy(b.buf[offset:], data)
return
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
// ReadAt implements io.ReaderAt. It reads data from the internal buffer starting
// from the specified offset and writes it into the provided data slice. If the
// offset is negative, it returns an error. If the requested read extends beyond
// the buffer's length, it returns the data read so far along with an io.EOF error.
func (b *buffer) ReadAt(data []byte, offset int64) (int, error) {
if offset < 0 {
return 0, errors.New("negative offset")
}
2024-06-10 21:12:11 -06:00
2024-11-09 09:32:29 -07:00
if offset > int64(len(b.buf)) || len(b.buf[offset:]) < len(data) {
return copy(data, b.buf[offset:]), io.EOF
}
2024-06-10 21:12:11 -06:00
2024-11-09 09:32:29 -07:00
return copy(data, b.buf[offset:]), nil
2024-06-10 21:12:11 -06:00
}
2024-11-09 09:32:29 -07:00
// IterOne takes an iterator that yields values of type T along with a value of
// type I, and returns an iterator that yields only the values of type T. It
// discards the values of type I.
func IterOne[I, T any](it iter.Seq2[I, T]) iter.Seq[T] {
return func(yield func(T) bool) {
for i, v := range it {
_ = i
if !yield(v) {
return
}
}
2024-06-10 21:12:11 -06:00
}
}