chore: add avro twt encoders

This commit is contained in:
xuu
2025-03-02 20:14:37 -07:00
commit 7b884ca2f9
8 changed files with 944 additions and 0 deletions

220
twt-avro/gen.go Normal file
View File

@@ -0,0 +1,220 @@
// Code generated by avro/gen. DO NOT EDIT.
package twt_avro
import (
"github.com/hamba/avro/v2"
)
// Linesep is a generated struct.
type Linesep struct {
}
var schemaLinesep = avro.MustParse(`{"name":"social.yarn.lextwt.linesep","type":"record","fields":[]}`)
// Schema returns the schema for Linesep.
func (o *Linesep) Schema() avro.Schema {
return schemaLinesep
}
// Unmarshal decodes b into the receiver.
func (o *Linesep) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Linesep) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Comment is a generated struct.
type Comment struct {
Comment string `avro:"comment"`
Key string `avro:"key"`
Value string `avro:"value"`
}
var schemaComment = avro.MustParse(`{"name":"social.yarn.lextwt.comment","type":"record","fields":[{"name":"comment","type":"string"},{"name":"key","type":"string"},{"name":"value","type":"string"}]}`)
// Schema returns the schema for Comment.
func (o *Comment) Schema() avro.Schema {
return schemaComment
}
// Unmarshal decodes b into the receiver.
func (o *Comment) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Comment) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Mention is a generated struct.
type Mention struct {
Name string `avro:"name"`
Domain string `avro:"domain"`
Target string `avro:"target"`
}
var schemaMention = avro.MustParse(`{"name":"social.yarn.lextwt.mention","type":"record","fields":[{"name":"name","type":"string"},{"name":"domain","type":"string"},{"name":"target","type":"string"}]}`)
// Schema returns the schema for Mention.
func (o *Mention) Schema() avro.Schema {
return schemaMention
}
// Unmarshal decodes b into the receiver.
func (o *Mention) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Mention) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Bangmention is a generated struct.
type Bangmention struct {
Name string `avro:"name"`
Target string `avro:"target"`
}
var schemaBangmention = avro.MustParse(`{"name":"social.yarn.lextwt.bangmention","type":"record","fields":[{"name":"name","type":"string"},{"name":"target","type":"string"}]}`)
// Schema returns the schema for Bangmention.
func (o *Bangmention) Schema() avro.Schema {
return schemaBangmention
}
// Unmarshal decodes b into the receiver.
func (o *Bangmention) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Bangmention) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Hashtag is a generated struct.
type Hashtag struct {
Tag string `avro:"tag"`
Target string `avro:"target"`
}
var schemaHashtag = avro.MustParse(`{"name":"social.yarn.lextwt.hashtag","type":"record","fields":[{"name":"tag","type":"string"},{"name":"target","type":"string"}]}`)
// Schema returns the schema for Hashtag.
func (o *Hashtag) Schema() avro.Schema {
return schemaHashtag
}
// Unmarshal decodes b into the receiver.
func (o *Hashtag) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Hashtag) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Subject is a generated struct.
type Subject struct {
Subject string `avro:"subject"`
Tag string `avro:"tag"`
Target string `avro:"target"`
}
var schemaSubject = avro.MustParse(`{"name":"social.yarn.lextwt.subject","type":"record","fields":[{"name":"subject","type":"string"},{"name":"tag","type":"string"},{"name":"target","type":"string"}]}`)
// Schema returns the schema for Subject.
func (o *Subject) Schema() avro.Schema {
return schemaSubject
}
// Unmarshal decodes b into the receiver.
func (o *Subject) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Subject) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Link is a generated struct.
type Link struct {
LinkType int `avro:"linkType"`
Text string `avro:"text"`
Target string `avro:"target"`
Title string `avro:"title"`
}
var schemaLink = avro.MustParse(`{"name":"social.yarn.lextwt.link","type":"record","fields":[{"name":"linkType","type":"int"},{"name":"text","type":"string"},{"name":"target","type":"string"},{"name":"title","type":"string"}]}`)
// Schema returns the schema for Link.
func (o *Link) Schema() avro.Schema {
return schemaLink
}
// Unmarshal decodes b into the receiver.
func (o *Link) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Link) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Code is a generated struct.
type Code struct {
Code string `avro:"code"`
Codetype int `avro:"codetype"`
}
var schemaCode = avro.MustParse(`{"name":"social.yarn.lextwt.code","type":"record","fields":[{"name":"code","type":"string"},{"name":"codetype","type":"int"}]}`)
// Schema returns the schema for Code.
func (o *Code) Schema() avro.Schema {
return schemaCode
}
// Unmarshal decodes b into the receiver.
func (o *Code) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Code) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}
// Lextwt is a generated struct.
type Lextwt struct {
Nick string `avro:"nick"`
URI string `avro:"uri"`
Created int64 `avro:"created"`
CreatedZone string `avro:"createdZone"`
CreatedOffset int `avro:"createdOffset"`
Msg []any `avro:"msg"`
}
var schemaLextwt = avro.MustParse(`{"name":"social.yarn.lextwt.lextwt","type":"record","fields":[{"name":"nick","type":"string"},{"name":"uri","type":"string"},{"name":"created","type":"long"},{"name":"createdZone","type":"string"},{"name":"createdOffset","type":"int"},{"name":"msg","type":{"type":"array","items":["null","string",{"name":"social.yarn.lextwt.linesep","type":"record","fields":[]},{"name":"social.yarn.lextwt.comment","type":"record","fields":[{"name":"comment","type":"string"},{"name":"key","type":"string"},{"name":"value","type":"string"}]},{"name":"social.yarn.lextwt.mention","type":"record","fields":[{"name":"name","type":"string"},{"name":"domain","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.bangmention","type":"record","fields":[{"name":"name","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.hashtag","type":"record","fields":[{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.subject","type":"record","fields":[{"name":"subject","type":"string"},{"name":"tag","type":"string"},{"name":"target","type":"string"}]},{"name":"social.yarn.lextwt.link","type":"record","fields":[{"name":"linkType","type":"int"},{"name":"text","type":"string"},{"name":"target","type":"string"},{"name":"title","type":"string"}]},{"name":"social.yarn.lextwt.code","type":"record","fields":[{"name":"code","type":"string"},{"name":"codetype","type":"int"}]}]}}]}`)
// Schema returns the schema for Lextwt.
func (o *Lextwt) Schema() avro.Schema {
return schemaLextwt
}
// Unmarshal decodes b into the receiver.
func (o *Lextwt) Unmarshal(b []byte) error {
return avro.Unmarshal(o.Schema(), b, o)
}
// Marshal encodes the receiver.
func (o *Lextwt) Marshal() ([]byte, error) {
return avro.Marshal(o.Schema(), o)
}

215
twt-avro/helper.go Normal file
View File

@@ -0,0 +1,215 @@
package twt_avro
import (
"time"
"github.com/hamba/avro"
"go.yarn.social/lextwt"
"go.yarn.social/types"
)
const (
keySubject = "social.yarn.lextwt.subject"
keyCode = "social.yarn.lextwt.code"
keyMention = "social.yarn.lextwt.mention"
keyHashtag = "social.yarn.lextwt.hashtag"
keyLink = "social.yarn.lextwt.link"
keyBangmention = "social.yarn.lextwt.bangmention"
keyLinesep = "social.yarn.lextwt.linesep"
keyComment = "social.yarn.lextwt.comment"
keyText = "social.yarn.lextwt.text"
)
func Register() {
avro.Register(keyText, "")
avro.Register(keySubject, Subject{})
avro.Register(keyCode, Code{})
avro.Register(keyMention, Mention{})
avro.Register(keyHashtag, Hashtag{})
avro.Register(keyLink, Link{})
avro.Register(keyBangmention, Bangmention{})
avro.Register(keyLinesep, Linesep{})
avro.Register(keyComment, Comment{})
}
func Elem(o any) any {
key := "unknown"
switch o.(type) {
case Subject:
key = keySubject
case Code:
key = keyCode
case Mention:
key = keyMention
case Hashtag:
key = keyHashtag
case Link:
key = keyLink
case Bangmention:
key = keyBangmention
case Linesep:
key = keyLinesep
case Comment:
key = keyComment
case string:
return o
}
return map[string]any{key: o}
}
func Msg(items ...any) []any {
return items
}
func FromTwt(twt types.Twt) Lextwt {
return FromLextwt(twt.(*lextwt.Twt))
}
func FromLextwt(twt *lextwt.Twt) Lextwt {
if twt == nil {
return Lextwt{}
}
ts := twt.Created()
_, offset := ts.Zone()
l := Lextwt{
Nick: twt.Twter().Nick,
URI: twt.Twter().URI,
Created: ts.UnixMilli(),
CreatedOffset: offset,
CreatedZone: "",
}
for _, e := range twt.Elems() {
if e == nil {
continue
}
if e == lextwt.LineSeparator {
l.Msg = append(l.Msg, Elem(Linesep{}))
continue
}
switch e := e.(type) {
case *lextwt.Subject:
l.Msg = append(l.Msg, Elem(Subject{Subject: e.Subject(), Tag: e.Tag().Text(), Target: e.Tag().Target()}))
case *lextwt.Code:
l.Msg = append(l.Msg, Elem(Code{Code: e.Text()}))
case *lextwt.Mention:
l.Msg = append(l.Msg, Elem(Mention{Name: e.Name(), Domain: e.Domain(), Target: e.Target()}))
case *lextwt.Tag:
l.Msg = append(l.Msg, Elem(Hashtag{Tag: e.Text(), Target: e.Target()}))
case *lextwt.Link:
l.Msg = append(l.Msg, Elem(Link{LinkType: int(e.LinkType()), Text: e.Text(), Target: e.Target(), Title: e.Title()}))
case *lextwt.BangMention:
l.Msg = append(l.Msg, Elem(Bangmention{Name: e.Name(), Target: e.Target()}))
case *lextwt.Comment:
l.Msg = append(l.Msg, Elem(Comment{Comment: e.Text(), Key: e.Key(), Value: e.Value()}))
case *lextwt.Text:
l.Msg = append(l.Msg, Elem(e.Literal()))
}
}
return l
}
func (l Lextwt) ToTwt() types.Twt {
return l.ToLextwt()
}
func (lx Lextwt) ToLextwt() *lextwt.Twt {
twter := types.Twter{
Nick: lx.Nick,
URI: lx.URI,
}
ts := time.UnixMilli(lx.Created)
if tz := time.FixedZone(lx.CreatedZone, lx.CreatedOffset); tz != nil {
ts = ts.In(tz)
}
dt := lextwt.NewDateTime(ts, ts.Format(time.RFC3339))
elems := make([]lextwt.Elem, 0, len(lx.Msg))
for _, e := range lx.Msg {
switch e := e.(type) {
case map[string]any:
if text, ok := e["string"].(string); ok {
elems = append(elems, lextwt.NewText(text))
}
if e, ok := e[keySubject].(map[string]any); ok {
subject := read[string](e, "subject")
tag := read[string](e, "tag")
target := read[string](e, "target")
if subject == "" {
elems = append(elems, lextwt.NewSubjectTag(tag, target))
} else {
elems = append(elems, lextwt.NewSubject(subject))
}
}
if m, ok := e[keyCode].(map[string]any); ok {
code := read[string](m, "code")
codeType := lextwt.CodeType(read[int8](m, "codetype"))
elems = append(elems, lextwt.NewCode(code, codeType))
}
if e, ok := e[keyMention].(map[string]any); ok {
name := read[string](e, "name")
target := read[string](e, "target")
elems = append(elems, lextwt.NewMention(name, target))
}
if e, ok := e[keyHashtag].(map[string]any); ok {
tag := read[string](e, "tag")
target := read[string](e, "target")
elems = append(elems, lextwt.NewTag(tag, target))
}
if e, ok := e[keyLink].(map[string]any); ok {
text := read[string](e, "text")
target := read[string](e, "target")
linkType := lextwt.LinkType(read[int](e, "linkType"))
elems = append(elems, lextwt.NewLink(text, target, linkType))
}
if e, ok := e[keyBangmention].(map[string]any); ok {
name := read[string](e, "name")
target := read[string](e, "target")
elems = append(elems, lextwt.NewBangMention(name, target))
}
if e, ok := e[keyComment].(map[string]any); ok {
comment := read[string](e, "comment")
key := read[string](e, "key")
value := read[string](e, "value")
if key != "" {
elems = append(elems, lextwt.NewCommentValue(comment, key, value))
} else {
elems = append(elems, lextwt.NewComment(comment))
}
}
if _, ok := e[keyLinesep].(map[string]any); ok {
elems = append(elems, lextwt.LineSeparator)
}
}
}
return lextwt.NewTwt(twter, dt, elems...)
}
func read[T any](m map[string]any, k string) T {
val, ok := m[k].(T)
if !ok {
var zero T
return zero
}
return val
}

115
twt-avro/helper_test.go Normal file
View File

@@ -0,0 +1,115 @@
package twt_avro_test
import (
"encoding/base64"
"fmt"
"log"
"testing"
"time"
"github.com/davecgh/go-spew/spew"
twt_avro "go.sour.is/lextwt-encoding/twt-avro"
"go.yarn.social/lextwt"
"go.yarn.social/types"
)
func TestMarshal(t *testing.T) {
twt_avro.Register()
ts := must(time.Parse(time.RFC3339, "2021-01-24T02:19:54Z"))
zone, offset := ts.Zone()
in := twt_avro.Lextwt{
Nick: "xuu",
URI: "https://xuu.txt",
Created: ts.UnixMilli(),
CreatedZone: zone,
CreatedOffset: offset,
Msg: twt_avro.Msg(
twt_avro.Elem(twt_avro.Subject{
Subject: "foobar",
}),
twt_avro.Elem("foo"),
twt_avro.Elem(twt_avro.Code{
Code: "baz",
}),
),
}
data, err := in.Marshal()
if err != nil {
log.Fatal("marshal: ", err)
}
fmt.Println(data)
// Outputs: [54 6 102 111 111]
out := twt_avro.Lextwt{}
err = out.Unmarshal(data)
if err != nil {
log.Fatal("unmarshal: ", err)
}
fmt.Printf("%#v", out)
// Outputs: {27 foo}
}
func must[T any](t T, err error) T {
if err != nil {
var zero T
return zero
}
return t
}
func TestFromTwt(t *testing.T) {
twt_avro.Register()
tests := []struct {
in string
twter *types.Twter
}{
{
in: "2021-01-24T02:19:54-07:00 (#asdf1234) @xuu This is a `twt`!\u2028Next Line!",
twter: &types.Twter{Nick: "xuu@sour.is", URI: "https://xuu.txt"},
},
{
in: "2021-01-24T02:19:54-07:00 !xuu <https://xuu.txt> #lol [lang=en]",
twter: &types.Twter{Nick: "xuu@sour.is", URI: "https://xuu.txt"},
},
}
for _, test := range tests {
twt, err := lextwt.ParseLine(test.in, test.twter)
if err != nil {
log.Fatal(err)
}
av := twt_avro.FromTwt(twt)
b, err := av.Marshal()
if err != nil {
log.Fatal(err)
}
t.Log(enc(b))
out := twt_avro.Lextwt{}
err = out.Unmarshal(b)
if err != nil {
log.Fatal(err)
}
t.Log(spew.Sdump(out))
outlx := out.ToLextwt()
outText := fmt.Sprintf("%+l",outlx)
t.Log(outText)
if outText != test.in {
t.Errorf("\nexpected %s\n got %s", test.in, outText)
}
t.Log(spew.Sdump(outlx.Elems()))
}
}
var enc = base64.RawStdEncoding.EncodeToString