chore: fixes and import

This commit is contained in:
xuu
2025-03-29 17:09:18 -06:00
parent 84c3099be6
commit dab5a115cf
7 changed files with 254 additions and 119 deletions

179
feed.go
View File

@@ -7,6 +7,7 @@ import (
"database/sql/driver"
"fmt"
"hash/fnv"
"io"
"iter"
"net/http"
"net/url"
@@ -93,8 +94,15 @@ var (
repeat = strings.Repeat(", (?, ?, ?, ?, ?, ?, ?)", r-1)
}
return `
insert into twts
(feed_id, ulid, text, hash, conv, mentions, tags)
insert into twts (
feed_id,
ulid,
text,
hash,
conv,
mentions,
tags
)
values (?, ?, ?, ?, ?, ?, ?)` + repeat + `
ON CONFLICT (feed_id, ulid) DO NOTHING`, r * 7
}
@@ -140,24 +148,6 @@ var (
}
)
func (f *Feed) Create(ctx context.Context, db db) error {
ctx, span := otel.Span(ctx)
defer span.End()
query, _ := insertFeed(1)
_, err := db.ExecContext(
ctx,
query,
f.FeedID, // feed_id
f.ParentID, // parent_id
f.Nick, // nick
f.URI, // uri
f.State, // state
f.LastScanOn, // last_scan_on
f.RefreshRate, // refresh_rate
)
return err
}
func (f *Feed) Save(ctx context.Context, db db) error {
ctx, span := otel.Span(ctx)
defer span.End()
@@ -248,7 +238,7 @@ func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
followers := f.Info().GetAll("follow")
followMap := make(map[string]string, len(followers))
for _, f := range f.Info().GetAll("follow") {
nick, uri, ok := strings.Cut(f.Value(), "http")
nick, uri, ok := strings.Cut(f.Value(), " ")
if !ok {
continue
}
@@ -259,7 +249,7 @@ func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
continue
}
followMap[nick] = uri
followMap[uri] = nick
}
defer tx.Rollback()
@@ -269,9 +259,11 @@ func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
args := make([]any, 0, size)
for _, twt := range twts {
twtID := makeULID(twt)
mentions := make(uuids, 0, len(twt.Mentions()))
for _, mention := range twt.Mentions() {
followMap[mention.Twter().Nick] = mention.Twter().URI
followMap[mention.Twter().URI] = mention.Twter().Nick
mentions = append(mentions, urlNS.UUID5(mention.Twter().URI))
}
@@ -287,10 +279,11 @@ func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
subjectTag = tag.Text()
}
}
args = append(
args,
feedID, // feed_id
makeULID(twt), // ulid
twtID, // ulid
fmt.Sprintf("%+l", twt), // text
subjectTag, // conv
twt.Hash(), // hash
@@ -348,7 +341,7 @@ func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
}
}
for nick, uri := range followMap {
for uri, nick := range followMap {
args = append(args,
urlNS.UUID5(uri), // feed_id
nil, // parent_id
@@ -372,6 +365,129 @@ func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
return tx.Commit()
}
func storeRegistry(ctx context.Context, db db, in io.Reader) error {
ctx, span := otel.Span(ctx)
defer span.End()
twters := make(map[string]string)
args := make([]any, 0, 1024*16)
for line := range lextwt.IterRegistry(in) {
twt, ok := line.(*lextwt.Twt)
if !ok {
continue
}
nick := twt.Twter().DomainNick()
uri := twt.Twter().URI
feedID := urlNS.UUID5(uri)
twtID := makeULID(twt)
text := fmt.Sprintf("%+l", twt)
// if !strings.HasPrefix(uri, "http") {
// fmt.Println("skip bad uri ", nick, uri)
// continue
// }
// if strings.HasPrefix(nick, "http") {
// fmt.Println("skip bad nick", nick, uri)
// continue
// }
twters[uri] = nick
mentions := make(uuids, 0, len(twt.Mentions()))
for _, mention := range twt.Mentions() {
twters[uri] = nick
mentions = append(mentions, urlNS.UUID5(mention.Twter().URI))
}
tags := make(strList, 0, len(twt.Tags()))
for _, tag := range twt.Tags() {
tags = append(tags, tag.Text())
}
subject := twt.Subject()
subjectTag := ""
if tag, ok := subject.Tag().(*lextwt.Tag); ok && tag != nil {
subjectTag = tag.Text()
}
args = append(
args,
feedID, // feed_id
twtID, // ulid
text, // text
twt.Hash(), // hash
subjectTag, // conv
mentions.ToStrList(), // mentions
tags, // tags
)
if len(args) >= 16*1022 {
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return err
}
for query, args := range chunk(args, insertTwt, db.MaxVariableNumber) {
// fmt.Println("store", len(args))
_, err = tx.ExecContext(
ctx,
query,
args...,
)
if err != nil {
return err
}
}
args = args[:0]
for uri, nick := range twters {
// if !strings.HasPrefix(uri, "http") {
// fmt.Println("skip", nick, uri)
// continue
// }
// if strings.HasPrefix(nick, "http") {
// fmt.Println("skip bad nick", nick, uri)
// continue
// }
feedID := urlNS.UUID5(uri)
args = append(args,
feedID, // feed_id
nil, // parent_id
nick, // nick
uri, // uri
PermanentlyDead, // state
nil, // last_scan_on
TenYear, // refresh_rate
)
}
for query, args := range chunk(args, insertFeed, db.MaxVariableNumber) {
_, err = tx.ExecContext(
ctx,
query,
args...,
)
if err != nil {
return err
}
}
args = args[:0]
err = tx.Commit()
if err != nil {
return err
}
}
}
return refreshLastTwt(ctx, db)
}
func (feed *Feed) MakeHTTPRequest(ctx context.Context) (*http.Request, error) {
for _, host := range permaban {
@@ -483,3 +599,16 @@ func chunk(args []any, qry func(int) (string, int), maxArgs int) iter.Seq2[strin
}
}
}
func refreshLastTwt(ctx context.Context, db db) error {
_, err := db.ExecContext(ctx, `
insert into last_twt_on
select
feed_id,
max(strftime('%Y-%m-%dT%H:%M:%fZ', (substring(text, 1, instr(text, ' ')-1)))) last_twt_on
from twts
group by feed_id
on conflict do update set last_twt_on = excluded.last_twt_on;
`)
return err
}