chore: add flatbuffers support

This commit is contained in:
xuu
2025-03-03 22:10:47 -07:00
parent 7b884ca2f9
commit 7ff278e0cf
31 changed files with 2321 additions and 165 deletions

2
twt-avro/example.txt Normal file
View File

@@ -0,0 +1,2 @@
example https://example.org/twtxt.txt 2016-02-06T21:32:02.000Z @erlehmann is messing with timestamps in @buckket #twtxt :)
example https://example.org/twtxt.txt 2016-02-06T12:14:18.000Z Simple nodejs script to convert your twitter timeline to twtxt: https://t.co/txnWsC5jvA ( find my #twtxt at https://t.co/uN1KDXwJ8B )

View File

@@ -192,8 +192,8 @@ func (o *Code) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Lextwt is a generated struct.
type Lextwt struct {
// Twt is a generated struct.
type Twt struct {
Nick string `avro:"nick"`
URI string `avro:"uri"`
Created int64 `avro:"created"`
@@ -202,19 +202,66 @@ type Lextwt struct {
Msg []any `avro:"msg"`
}
var schemaLextwt = avro.MustParse(`{"name":"social.yarn.lextwt.lextwt","type":"record","fields":[{"name":"nick","type":"string"},{"name":"uri","type":"string"},{"name":"created","type":"long"},{"name":"createdZone","type":"string"},{"name":"createdOffset","type":"int"},{"name":"msg","type":{"type":"array","items":["null","string",{"name":"social.yarn.lextwt.linesep","type":"record","fields":[]},{"name":"social.yarn.lextwt.comment","type":"record","fields":[{"name":"comment","type":"string"},{"name":"key","type":"string"},{"name":"value","type":"string"}]},{"name":"social.yarn.lextwt.mention","type":"record","fields":[{"name":"name","type":"string"},{"name":"domain","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.bangmention","type":"record","fields":[{"name":"name","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.hashtag","type":"record","fields":[{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.subject","type":"record","fields":[{"name":"subject","type":"string"},{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.link","type":"record","fields":[{"name":"linkType","type":"int"},{"name":"text","type":"string"},{"name":"target","type":"string"},{"name":"title","type":"string"}]},{"name":"social.yarn.lextwt.code","type":"record","fields":[{"name":"code","type":"string"},{"name":"codetype","type":"int"}]}]}}]}`)
var schemaTwt = avro.MustParse(`{"name":"social.yarn.lextwt.twt","type":"record","fields":[{"name":"nick","type":"string"},{"name":"uri","type":"string"},{"name":"created","type":"long"},{"name":"createdZone","type":"string"},{"name":"createdOffset","type":"int"},{"name":"msg","type":{"type":"array","items":["null","string",{"name":"social.yarn.lextwt.linesep","type":"record","fields":[]},{"name":"social.yarn.lextwt.comment","type":"record","fields":[{"name":"comment","type":"string"},{"name":"key","type":"string"},{"name":"value","type":"string"}]},{"name":"social.yarn.lextwt.mention","type":"record","fields":[{"name":"name","type":"string"},{"name":"domain","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.bangmention","type":"record","fields":[{"name":"name","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.hashtag","type":"record","fields":[{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.subject","type":"record","fields":[{"name":"subject","type":"string"},{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.link","type":"record","fields":[{"name":"linkType","type":"int"},{"name":"text","type":"string"},{"name":"target","type":"string"},{"name":"title","type":"string"}]},{"name":"social.yarn.lextwt.code","type":"record","fields":[{"name":"code","type":"string"},{"name":"codetype","type":"int"}]}]}}]}`)
// Schema returns the schema for Lextwt.
func (o *Lextwt) Schema() avro.Schema {
return schemaLextwt
// Schema returns the schema for Twt.
func (o *Twt) Schema() avro.Schema {
return schemaTwt
}
// Unmarshal decodes b into the receiver.
func (o *Lextwt) Unmarshal(b []byte) error {
func (o *Twt) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Lextwt) Marshal() ([]byte, error) {
func (o *Twt) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Twter is a generated struct.
type Twter struct {
Nick string `avro:"nick"`
URI string `avro:"uri"`
}
var schemaTwter = avro.MustParse(`{"name":"social.yarn.lextwt.twter","type":"record","fields":[{"name":"nick","type":"string"},{"name":"uri","type":"string"}]}`)
// Schema returns the schema for Twter.
func (o *Twter) Schema() avro.Schema {
return schemaTwter
}
// Unmarshal decodes b into the receiver.
func (o *Twter) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Twter) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Registry is a generated struct.
type Registry struct {
Twts []Twt `avro:"twts"`
Twters []Twter `avro:"twters"`
Preamble []Comment `avro:"preamble"`
}
var schemaRegistry = avro.MustParse(`{"name":"social.yarn.lextwt.registry","type":"record","fields":[{"name":"twts","type":{"type":"array","items":{"name":"social.yarn.lextwt.twt","type":"record","fields":[{"name":"nick","type":"string"},{"name":"uri","type":"string"},{"name":"created","type":"long"},{"name":"createdZone","type":"string"},{"name":"createdOffset","type":"int"},{"name":"msg","type":{"type":"array","items":["null","string",{"name":"social.yarn.lextwt.linesep","type":"record","fields":[]},{"name":"social.yarn.lextwt.comment","type":"record","fields":[{"name":"comment","type":"string"},{"name":"key","type":"string"},{"name":"value","type":"string"}]},{"name":"social.yarn.lextwt.mention","type":"record","fields":[{"name":"name","type":"string"},{"name":"domain","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.bangmention","type":"record","fields":[{"name":"name","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.hashtag","type":"record","fields":[{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.subject","type":"record","fields":[{"name":"subject","type":"string"},{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.link","type":"record","fields":[{"name":"linkType","type":"int"},{"name":"text","type":"string"},{"name":"target","type":"string"},{"name":"title","type":"string"}]},{"name":"social.yarn.lextwt.code","type":"record","fields":[{"name":"code","type":"string"},{"name":"codetype","type":"int"}]}]}}]}}},{"name":"twters","type":{"type":"array","items":{"name":"social.yarn.lextwt.twter","type":"record","fields":[{"name":"nick","type":"string"},{"name":"uri","type":"string"}]}}},{"name":"preamble","type":{"type":"array","items":"social.yarn.lextwt.comment"}}]}`)
// Schema returns the schema for Registry.
func (o *Registry) Schema() avro.Schema {
return schemaRegistry
}
// Unmarshal decodes b into the receiver.
func (o *Registry) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Registry) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}

View File

@@ -1,6 +1,7 @@
package twt_avro
import (
"io"
"time"
"github.com/hamba/avro"
@@ -18,6 +19,9 @@ const (
keyLinesep = "social.yarn.lextwt.linesep"
keyComment = "social.yarn.lextwt.comment"
keyText = "social.yarn.lextwt.text"
keyTwt = "social.yarn.lextwt.twt"
keyTwter = "social.yarn.lextwt.twter"
keyRegistry = "social.yarn.lextwt.registry"
)
func Register() {
@@ -30,6 +34,9 @@ func Register() {
avro.Register(keyBangmention, Bangmention{})
avro.Register(keyLinesep, Linesep{})
avro.Register(keyComment, Comment{})
avro.Register(keyTwt, Twt{})
avro.Register(keyTwter, Twter{})
avro.Register(keyRegistry, Registry{})
}
func Elem(o any) any {
@@ -62,18 +69,18 @@ func Msg(items ...any) []any {
return items
}
func FromTwt(twt types.Twt) Lextwt {
func FromTwt(twt types.Twt) Twt {
return FromLextwt(twt.(*lextwt.Twt))
}
func FromLextwt(twt *lextwt.Twt) Lextwt {
func FromLextwt(twt *lextwt.Twt) Twt {
if twt == nil {
return Lextwt{}
return Twt{}
}
ts := twt.Created()
_, offset := ts.Zone()
l := Lextwt{
l := Twt{
Nick: twt.Twter().Nick,
URI: twt.Twter().URI,
Created: ts.UnixMilli(),
@@ -95,7 +102,7 @@ func FromLextwt(twt *lextwt.Twt) Lextwt {
case *lextwt.Subject:
l.Msg = append(l.Msg, Elem(Subject{Subject: e.Subject(), Tag: e.Tag().Text(), Target: e.Tag().Target()}))
case *lextwt.Code:
l.Msg = append(l.Msg, Elem(Code{Code: e.Text()}))
l.Msg = append(l.Msg, Elem(Code{Code: e.Text(), Codetype: int(e.CodeType())}))
case *lextwt.Mention:
l.Msg = append(l.Msg, Elem(Mention{Name: e.Name(), Domain: e.Domain(), Target: e.Target()}))
case *lextwt.Tag:
@@ -114,11 +121,11 @@ func FromLextwt(twt *lextwt.Twt) Lextwt {
return l
}
func (l Lextwt) ToTwt() types.Twt {
func (l Twt) ToTwt() types.Twt {
return l.ToLextwt()
}
func (lx Lextwt) ToLextwt() *lextwt.Twt {
func (lx Twt) ToLextwt() *lextwt.Twt {
twter := types.Twter{
Nick: lx.Nick,
URI: lx.URI,
@@ -213,3 +220,60 @@ func read[T any](m map[string]any, k string) T {
}
return val
}
type TwtRegistry interface {
Twters() []*types.Twter
Preamble() lextwt.Comments
Twts() types.Twts
WriteTo(w io.Writer) (int64, error)
}
func EncodeRegistry(r TwtRegistry) ([]byte, error) {
out := Registry{}
for _, comment := range r.Preamble() {
if comment.Key() != "" {
out.Preamble = append(out.Preamble, Comment{Key: comment.Key(), Value: comment.Value()})
continue
}
out.Preamble = append(out.Preamble, Comment{Comment: comment.Text()})
}
for _, twter := range r.Twters() {
out.Twters = append(out.Twters, Twter{Nick: twter.Nick, URI: twter.URI})
}
for _, twt := range r.Twts() {
out.Twts = append(out.Twts, FromTwt(twt))
}
return out.Marshal()
}
func DecodeRegistry(b []byte) (TwtRegistry, error) {
var out Registry
if err := out.Unmarshal(b); err != nil {
return nil, err
}
preamble := make(lextwt.Comments, len(out.Preamble))
for _, comment := range out.Preamble {
if comment.Key != "" {
preamble = append(preamble, lextwt.NewCommentValue(comment.Comment, comment.Key, comment.Value))
} else {
preamble = append(preamble, lextwt.NewComment(comment.Comment))
}
}
twters := make([]*types.Twter, len(out.Twters))
for _, twter := range out.Twters {
twters = append(twters, &types.Twter{Nick:twter.Nick, URI:twter.URI})
}
twts := make(types.Twts, len(out.Twts))
for _, twt := range out.Twts {
twts = append(twts, twt.ToTwt())
}
return lextwt.NewTwtRegistry(twters, preamble, twts), nil
}

View File

@@ -1,9 +1,12 @@
package twt_avro_test
import (
"bytes"
_ "embed"
"encoding/base64"
"fmt"
"log"
"os"
"testing"
"time"
@@ -18,7 +21,7 @@ func TestMarshal(t *testing.T) {
ts := must(time.Parse(time.RFC3339, "2021-01-24T02:19:54Z"))
zone, offset := ts.Zone()
in := twt_avro.Lextwt{
in := twt_avro.Twt{
Nick: "xuu",
URI: "https://xuu.txt",
Created: ts.UnixMilli(),
@@ -43,7 +46,7 @@ func TestMarshal(t *testing.T) {
fmt.Println(data)
// Outputs: [54 6 102 111 111]
out := twt_avro.Lextwt{}
out := twt_avro.Twt{}
err = out.Unmarshal(data)
if err != nil {
log.Fatal("unmarshal: ", err)
@@ -93,7 +96,7 @@ func TestFromTwt(t *testing.T) {
t.Log(enc(b))
out := twt_avro.Lextwt{}
out := twt_avro.Twt{}
err = out.Unmarshal(b)
if err != nil {
log.Fatal(err)
@@ -103,7 +106,7 @@ func TestFromTwt(t *testing.T) {
outlx := out.ToLextwt()
outText := fmt.Sprintf("%+l",outlx)
outText := fmt.Sprintf("%+l", outlx)
t.Log(outText)
if outText != test.in {
t.Errorf("\nexpected %s\n got %s", test.in, outText)
@@ -113,3 +116,29 @@ func TestFromTwt(t *testing.T) {
}
var enc = base64.RawStdEncoding.EncodeToString
//go:embed example.txt
var input []byte
func TestEncodeRegistry(t *testing.T) {
twt_avro.Register()
registry, err := lextwt.ParseRegistry(bytes.NewReader(input))
if err != nil {
log.Fatal(err)
}
b, err := twt_avro.EncodeRegistry(registry)
if err != nil {
log.Fatal(err)
}
t.Log(enc(b))
out, err := twt_avro.DecodeRegistry(b)
if err != nil {
log.Fatal(err)
}
out.WriteTo(os.Stdout)
}