chore: many fixes to code
This commit is contained in:
333
feed.go
333
feed.go
@@ -4,7 +4,9 @@ import (
|
||||
"cmp"
|
||||
"context"
|
||||
"database/sql"
|
||||
"database/sql/driver"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"iter"
|
||||
"net/http"
|
||||
"net/url"
|
||||
@@ -14,6 +16,7 @@ import (
|
||||
|
||||
_ "embed"
|
||||
|
||||
"github.com/oklog/ulid/v2"
|
||||
"go.sour.is/xt/internal/otel"
|
||||
"go.yarn.social/lextwt"
|
||||
"go.yarn.social/types"
|
||||
@@ -21,22 +24,22 @@ import (
|
||||
|
||||
type Feed struct {
|
||||
FeedID uuid
|
||||
FetchURI string
|
||||
ParentID uuid
|
||||
HashURI string
|
||||
URI string
|
||||
Nick string
|
||||
LastScanOn sql.NullTime
|
||||
State State
|
||||
LastScanOn TwtTime
|
||||
RefreshRate int
|
||||
NextScanOn TwtTime
|
||||
|
||||
LastModified sql.NullTime
|
||||
LastModified TwtTime
|
||||
LastError sql.NullString
|
||||
ETag sql.NullString
|
||||
|
||||
Version string
|
||||
DiscloseFeedURL string
|
||||
DiscloseNick string
|
||||
FirstFetch bool
|
||||
|
||||
State State
|
||||
}
|
||||
|
||||
type State string
|
||||
@@ -47,43 +50,34 @@ const (
|
||||
Cold State = "cold"
|
||||
Warm State = "warm"
|
||||
Hot State = "hot"
|
||||
Once State = "once"
|
||||
)
|
||||
|
||||
var (
|
||||
//go:embed init.sql
|
||||
initSQL string
|
||||
|
||||
insertFeed = `
|
||||
insert into feeds
|
||||
(feed_id, uri, nick, last_scan_on, refresh_rate)
|
||||
values (?, ?, ?, ?, ?)
|
||||
ON CONFLICT (feed_id) DO NOTHING
|
||||
`
|
||||
|
||||
insertTwt = `
|
||||
insert into twts
|
||||
(feed_id, hash, conv, dt, text, mentions, tags)
|
||||
values (?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT (feed_id, hash) DO NOTHING
|
||||
`
|
||||
|
||||
fetchFeeds = `
|
||||
select
|
||||
feed_id,
|
||||
uri,
|
||||
nick,
|
||||
last_scan_on,
|
||||
refresh_rate,
|
||||
last_modified_on,
|
||||
last_etag
|
||||
from feeds
|
||||
where datetime(
|
||||
coalesce(last_scan_on, '1901-01-01'),
|
||||
'+'||refresh_rate||' seconds'
|
||||
) < datetime(current_timestamp, '+10 minutes')
|
||||
`
|
||||
insertFeed = func(r int) (string, int) {
|
||||
repeat := ""
|
||||
if r > 1 {
|
||||
repeat = strings.Repeat(", (?, ?, ?, ?, ?, ?, ?)", r-1)
|
||||
}
|
||||
return `
|
||||
insert into feeds (
|
||||
feed_id,
|
||||
parent_id,
|
||||
nick,
|
||||
uri,
|
||||
state,
|
||||
last_scan_on,
|
||||
refresh_rate
|
||||
)
|
||||
values (?, ?, ?, ?, ?, ?, ?)` + repeat + `
|
||||
ON CONFLICT (feed_id) DO NOTHING`, r * 7
|
||||
}
|
||||
updateFeed = `
|
||||
update feeds set
|
||||
update feeds set
|
||||
state = ?,
|
||||
last_scan_on = ?,
|
||||
refresh_rate = ?,
|
||||
last_modified_on = ?,
|
||||
@@ -91,21 +85,83 @@ var (
|
||||
last_error = ?
|
||||
where feed_id = ?
|
||||
`
|
||||
|
||||
insertTwt = func(r int) (string, int) {
|
||||
repeat := ""
|
||||
if r > 1 {
|
||||
repeat = strings.Repeat(", (?, ?, ?, ?, ?, ?, ?)", r-1)
|
||||
}
|
||||
return `
|
||||
insert into twts
|
||||
(feed_id, ulid, text, hash, conv, mentions, tags)
|
||||
values (?, ?, ?, ?, ?, ?, ?)` + repeat + `
|
||||
ON CONFLICT (feed_id, ulid) DO NOTHING`, r * 7
|
||||
}
|
||||
|
||||
fetchFeeds = `
|
||||
select
|
||||
feed_id,
|
||||
parent_id,
|
||||
coalesce(hashing_uri, uri) hash_uri,
|
||||
uri,
|
||||
nick,
|
||||
state,
|
||||
last_scan_on,
|
||||
strftime(
|
||||
'%Y-%m-%dT%H:%M:%fZ',
|
||||
coalesce(last_scan_on, '1901-01-01'),
|
||||
'+'||refresh_rate||' seconds'
|
||||
) next_scan_on,
|
||||
refresh_rate,
|
||||
last_modified_on,
|
||||
last_etag
|
||||
from feeds
|
||||
left join (
|
||||
select
|
||||
feed_id parent_id,
|
||||
uri hashing_uri
|
||||
from feeds
|
||||
where parent_id is null
|
||||
) using (parent_id)
|
||||
where datetime(
|
||||
coalesce(last_scan_on, '1901-01-01'),
|
||||
'+'||refresh_rate||' seconds'
|
||||
) < datetime(current_timestamp, '+10 minutes')
|
||||
`
|
||||
)
|
||||
|
||||
func (f *Feed) Save(ctx context.Context, db *sql.DB) error {
|
||||
func (f *Feed) Create(ctx context.Context, db db) error {
|
||||
ctx, span := otel.Span(ctx)
|
||||
defer span.End()
|
||||
query, _ := insertFeed(1)
|
||||
_, err := db.ExecContext(
|
||||
ctx,
|
||||
query,
|
||||
f.FeedID, // feed_id
|
||||
f.ParentID, // parent_id
|
||||
f.Nick, // nick
|
||||
f.URI, // uri
|
||||
f.State, // state
|
||||
f.LastScanOn, // last_scan_on
|
||||
f.RefreshRate, // refresh_rate
|
||||
)
|
||||
return err
|
||||
}
|
||||
|
||||
func (f *Feed) Save(ctx context.Context, db db) error {
|
||||
ctx, span := otel.Span(ctx)
|
||||
defer span.End()
|
||||
|
||||
_, err := db.ExecContext(
|
||||
ctx,
|
||||
updateFeed,
|
||||
f.LastScanOn,
|
||||
f.RefreshRate,
|
||||
f.LastModified,
|
||||
f.ETag,
|
||||
f.LastError,
|
||||
f.FeedID,
|
||||
f.State, // state
|
||||
f.LastScanOn, // last_scan_on
|
||||
f.RefreshRate, // refresh_rate
|
||||
f.LastModified, // last_modified_on
|
||||
f.ETag, // last_etag
|
||||
f.LastError, // last_error
|
||||
f.FeedID, // feed_id
|
||||
)
|
||||
return err
|
||||
}
|
||||
@@ -117,9 +173,13 @@ func (f *Feed) Scan(res interface{ Scan(...any) error }) error {
|
||||
f.Version = "0.0.1"
|
||||
err = res.Scan(
|
||||
&f.FeedID,
|
||||
&f.ParentID,
|
||||
&f.HashURI,
|
||||
&f.URI,
|
||||
&f.Nick,
|
||||
&f.State,
|
||||
&f.LastScanOn,
|
||||
&f.NextScanOn,
|
||||
&f.RefreshRate,
|
||||
&f.LastModified,
|
||||
&f.ETag,
|
||||
@@ -128,19 +188,10 @@ func (f *Feed) Scan(res interface{ Scan(...any) error }) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if !f.LastScanOn.Valid {
|
||||
f.FirstFetch = true
|
||||
f.LastScanOn.Time = time.Now()
|
||||
f.LastScanOn.Valid = true
|
||||
} else {
|
||||
f.LastScanOn.Time = f.LastScanOn.Time.Add(time.Duration(f.RefreshRate) * time.Second)
|
||||
}
|
||||
|
||||
f.FetchURI = f.URI
|
||||
return err
|
||||
}
|
||||
|
||||
func loadFeeds(ctx context.Context, db *sql.DB) (iter.Seq[Feed], error) {
|
||||
func loadFeeds(ctx context.Context, db db) (iter.Seq[Feed], error) {
|
||||
ctx, span := otel.Span(ctx)
|
||||
|
||||
var err error
|
||||
@@ -159,6 +210,7 @@ func loadFeeds(ctx context.Context, db *sql.DB) (iter.Seq[Feed], error) {
|
||||
var f Feed
|
||||
err = f.Scan(res)
|
||||
if err != nil {
|
||||
span.RecordError(err)
|
||||
return
|
||||
}
|
||||
if !yield(f) {
|
||||
@@ -168,7 +220,7 @@ func loadFeeds(ctx context.Context, db *sql.DB) (iter.Seq[Feed], error) {
|
||||
}, err
|
||||
}
|
||||
|
||||
func storeFeed(ctx context.Context, db *sql.DB, f types.TwtFile) error {
|
||||
func storeFeed(ctx context.Context, db db, f types.TwtFile) error {
|
||||
ctx, span := otel.Span(ctx)
|
||||
defer span.End()
|
||||
|
||||
@@ -201,20 +253,11 @@ func storeFeed(ctx context.Context, db *sql.DB, f types.TwtFile) error {
|
||||
|
||||
defer tx.Rollback()
|
||||
|
||||
_, err = tx.ExecContext(
|
||||
ctx,
|
||||
insertFeed,
|
||||
feedID,
|
||||
f.Twter().HashingURI,
|
||||
f.Twter().DomainNick(),
|
||||
loadTS,
|
||||
refreshRate,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
twts := f.Twts()
|
||||
_, size := insertTwt(len(twts))
|
||||
args := make([]any, 0, size)
|
||||
|
||||
for _, twt := range f.Twts() {
|
||||
for _, twt := range twts {
|
||||
mentions := make(uuids, 0, len(twt.Mentions()))
|
||||
for _, mention := range twt.Mentions() {
|
||||
followMap[mention.Twter().Nick] = mention.Twter().URI
|
||||
@@ -233,32 +276,76 @@ func storeFeed(ctx context.Context, db *sql.DB, f types.TwtFile) error {
|
||||
subjectTag = tag.Text()
|
||||
}
|
||||
}
|
||||
args = append(
|
||||
args,
|
||||
feedID, // feed_id
|
||||
makeULID(twt), // ulid
|
||||
fmt.Sprintf("%+l", twt), // text
|
||||
subjectTag, // conv
|
||||
twt.Hash(), // hash
|
||||
mentions.ToStrList(), // mentions
|
||||
tags, // tags
|
||||
)
|
||||
}
|
||||
for query, args := range chunk(args, insertTwt, db.MaxVariableNumber) {
|
||||
fmt.Println("store", f.Twter().URI, len(args))
|
||||
|
||||
_, err = tx.ExecContext(
|
||||
ctx,
|
||||
insertTwt,
|
||||
feedID,
|
||||
twt.Hash(),
|
||||
subjectTag,
|
||||
twt.Created(),
|
||||
fmt.Sprint(twt),
|
||||
mentions.ToStrList(),
|
||||
tags,
|
||||
query,
|
||||
args...,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
args = args[:0]
|
||||
args = append(args,
|
||||
feedID, // feed_id
|
||||
nil, // parent_id
|
||||
f.Twter().DomainNick(), // nick
|
||||
f.Twter().URI, // uri
|
||||
"warm", // state
|
||||
TwtTime{Time: loadTS, Valid: true}, // last_scan_on
|
||||
refreshRate, // refresh_rate
|
||||
)
|
||||
|
||||
if prev, ok := f.Info().GetN("prev", 0); ok {
|
||||
_, part, ok := strings.Cut(prev.Value(), " ")
|
||||
if ok {
|
||||
uri:= f.Twter().URI
|
||||
part = uri[:strings.LastIndex(uri, "/")+1] + part
|
||||
childID := urlNS.UUID5(part)
|
||||
|
||||
args = append(args,
|
||||
childID, // feed_id
|
||||
feedID, // parent_id
|
||||
f.Twter().DomainNick(), // nick
|
||||
part, // uri
|
||||
"once", // state
|
||||
nil, // last_scan_on
|
||||
refreshRate, // refresh_rate
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
for nick, uri := range followMap {
|
||||
args = append(args,
|
||||
urlNS.UUID5(uri), // feed_id
|
||||
nil, // parent_id
|
||||
nick, // nick
|
||||
uri, // uri
|
||||
"warm", // state
|
||||
nil, // last_scan_on
|
||||
refreshRate, // refresh_rate
|
||||
)
|
||||
}
|
||||
for query, args := range chunk(args, insertFeed, db.MaxVariableNumber) {
|
||||
_, err = tx.ExecContext(
|
||||
ctx,
|
||||
insertFeed,
|
||||
urlNS.UUID5(uri),
|
||||
uri,
|
||||
nick,
|
||||
nil,
|
||||
refreshRate,
|
||||
query,
|
||||
args...,
|
||||
)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -270,11 +357,11 @@ func storeFeed(ctx context.Context, db *sql.DB, f types.TwtFile) error {
|
||||
|
||||
func (feed *Feed) MakeHTTPRequest(ctx context.Context) (*http.Request, error) {
|
||||
feed.State = "fetch"
|
||||
if strings.Contains(feed.FetchURI, "lublin.se") {
|
||||
if strings.Contains(feed.URI, "lublin.se") {
|
||||
return nil, fmt.Errorf("%w: permaban: %s", ErrPermanentlyDead, feed.URI)
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", feed.FetchURI, nil)
|
||||
req, err := http.NewRequestWithContext(ctx, "GET", feed.URI, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating HTTP request failed: %w", err)
|
||||
}
|
||||
@@ -298,3 +385,83 @@ func (feed *Feed) MakeHTTPRequest(ctx context.Context) (*http.Request, error) {
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
type TwtTime struct {
|
||||
Time time.Time
|
||||
Valid bool // Valid is true if Time is not NULL
|
||||
}
|
||||
|
||||
// Scan implements the [Scanner] interface.
|
||||
func (n *TwtTime) Scan(value any) error {
|
||||
var err error
|
||||
|
||||
switch value := value.(type) {
|
||||
case nil:
|
||||
n.Time, n.Valid = time.Time{}, false
|
||||
return nil
|
||||
case string:
|
||||
n.Valid = true
|
||||
n.Time, err = time.Parse(time.RFC3339, value)
|
||||
case time.Time:
|
||||
n.Valid = true
|
||||
n.Time = value
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Value implements the [driver.Valuer] interface.
|
||||
func (n TwtTime) Value() (driver.Value, error) {
|
||||
if !n.Valid {
|
||||
return nil, nil
|
||||
}
|
||||
return n.Time.Format(time.RFC3339), nil
|
||||
}
|
||||
|
||||
func makeULID(twt types.Twt) ulid.ULID {
|
||||
h64 := fnv.New64a()
|
||||
h16 := fnv.New32a()
|
||||
text := []byte(fmt.Sprintf("%+l", twt))
|
||||
b := make([]byte, 10)
|
||||
copy(b, h16.Sum(text)[:2])
|
||||
copy(b[2:], h64.Sum(text))
|
||||
u := ulid.ULID{}
|
||||
u.SetTime(ulid.Timestamp(twt.Created()))
|
||||
u.SetEntropy(b)
|
||||
|
||||
return u
|
||||
}
|
||||
|
||||
func chunk(args []any, qry func(int) (string, int), maxArgs int) iter.Seq2[string, []any] {
|
||||
_, size := qry(1)
|
||||
itemsPerIter := maxArgs / size
|
||||
|
||||
if len(args) < size {
|
||||
return func(yield func(string, []any) bool) {}
|
||||
}
|
||||
|
||||
if len(args) < maxArgs {
|
||||
return func(yield func(string, []any) bool) {
|
||||
query, _ := qry(len(args) / size)
|
||||
yield(query, args)
|
||||
}
|
||||
}
|
||||
|
||||
return func(yield func(string, []any) bool) {
|
||||
for len(args) > 0 {
|
||||
if len(args) > maxArgs {
|
||||
query, size := qry(itemsPerIter)
|
||||
if !yield(query, args[:size]) {
|
||||
return
|
||||
}
|
||||
args = args[size:]
|
||||
continue
|
||||
}
|
||||
|
||||
query, _ := qry(len(args) / size)
|
||||
yield(query, args)
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user