xt/feed.go
2025-02-15 16:12:42 -07:00

285 lines
5.2 KiB
Go

package main
import (
"cmp"
"context"
"database/sql"
"fmt"
"iter"
"net/http"
"net/url"
"slices"
"strings"
"time"
_ "embed"
"go.yarn.social/lextwt"
"go.yarn.social/types"
)
type Feed struct {
FeedID uuid
FetchURI string
URI string
Nick string
LastScanOn sql.NullTime
RefreshRate int
LastModified sql.NullTime
LastError sql.NullString
ETag sql.NullString
Version string
DiscloseFeedURL string
DiscloseNick string
FirstFetch bool
State State
}
type State string
const (
PermanentlyDead State = "permanantly-dead"
Frozen State = "frozen"
Cold State = "cold"
Warm State = "warm"
Hot State = "hot"
)
var (
//go:embed init.sql
initSQL string
insertFeed = `
insert into feeds
(feed_id, uri, nick, last_scan_on, refresh_rate)
values (?, ?, ?, ?, ?)
ON CONFLICT (feed_id) DO NOTHING
`
insertTwt = `
insert into twts
(feed_id, hash, conv, dt, text, mentions, tags)
values (?, ?, ?, ?, ?, ?, ?)
ON CONFLICT (feed_id, hash) DO NOTHING
`
fetchFeeds = `
select
feed_id,
uri,
nick,
last_scan_on,
refresh_rate,
last_modified_on,
last_etag
from feeds
where datetime(last_scan_on, '+'||refresh_rate||' seconds') < datetime(current_timestamp, '+10 minutes')
`
updateFeed = `
update feeds set
last_scan_on = ?,
refresh_rate = ?,
last_modified_on = ?,
last_etag = ?,
last_error = ?
where feed_id = ?
`
)
func (f *Feed) Save(ctx context.Context, db *sql.DB) error {
fmt.Println(f.FetchURI, " ", f.LastModified, " ", f.LastError)
_, err := db.ExecContext(
ctx,
updateFeed,
f.LastScanOn,
f.RefreshRate,
f.LastModified,
f.ETag,
f.LastError,
f.FeedID,
)
return err
}
func (f *Feed) Scan(res interface{ Scan(...any) error }) error {
f.State = "load"
var err error
f.Version = "0.0.1"
err = res.Scan(
&f.FeedID,
&f.URI,
&f.Nick,
&f.LastScanOn,
&f.RefreshRate,
&f.LastModified,
&f.ETag,
)
if err != nil {
return err
}
if !f.LastScanOn.Valid {
f.FirstFetch = true
f.LastScanOn.Time = time.Now()
f.LastScanOn.Valid = true
} else {
f.LastScanOn.Time = f.LastScanOn.Time.Add(time.Duration(f.RefreshRate) * time.Second)
}
f.FetchURI = f.URI
return err
}
func loadFeeds(ctx context.Context, db *sql.DB) (iter.Seq[Feed], error) {
var err error
var res *sql.Rows
res, err = db.QueryContext(ctx, fetchFeeds)
if err != nil {
return slices.Values([]Feed{}), err
}
return func(yield func(Feed) bool) {
for res.Next() {
var f Feed
err = f.Scan(res)
if err != nil {
return
}
if !yield(f) {
return
}
}
}, err
}
func storeFeed(db *sql.DB, f types.TwtFile) error {
loadTS := time.Now()
refreshRate := 600
feedID := urlNS.UUID5(cmp.Or(f.Twter().HashingURI, f.Twter().URI))
tx, err := db.Begin()
if err != nil {
return err
}
followers := f.Info().GetAll("follow")
followMap := make(map[string]string, len(followers))
for _, f := range f.Info().GetAll("follow") {
nick, uri, ok := strings.Cut(f.Value(), "http")
if !ok {
continue
}
nick = strings.TrimSpace(nick)
uri = "http" + strings.TrimSpace(uri)
if _, err := url.Parse(uri); err != nil {
continue
}
followMap[nick] = uri
}
defer tx.Rollback()
_, err = tx.Exec(
insertFeed,
feedID,
f.Twter().HashingURI,
f.Twter().DomainNick(),
loadTS,
refreshRate,
)
if err != nil {
return err
}
for _, twt := range f.Twts() {
mentions := make(uuids, 0, len(twt.Mentions()))
for _, mention := range twt.Mentions() {
followMap[mention.Twter().Nick] = mention.Twter().URI
mentions = append(mentions, urlNS.UUID5(mention.Twter().URI))
}
tags := make(strList, 0, len(twt.Tags()))
for _, tag := range twt.Tags() {
tags = append(tags, tag.Text())
}
subject := twt.Subject()
subjectTag := ""
if subject != nil {
if tag, ok := subject.Tag().(*lextwt.Tag); ok && tag != nil {
subjectTag = tag.Text()
}
}
_, err = tx.Exec(
insertTwt,
feedID,
twt.Hash(),
subjectTag,
twt.Created(),
fmt.Sprint(twt),
mentions.ToStrList(),
tags,
)
if err != nil {
return err
}
}
for nick, uri := range followMap {
_, err = tx.Exec(
insertFeed,
urlNS.UUID5(uri),
uri,
nick,
nil,
refreshRate,
)
if err != nil {
return err
}
}
return tx.Commit()
}
func (feed *Feed) MakeHTTPRequest(ctx context.Context) (*http.Request, error) {
feed.State = "fetch"
if strings.Contains(feed.FetchURI, "lublin.se") {
return nil, fmt.Errorf("%w: permaban: %s", ErrPermanentlyDead, feed.URI)
}
req, err := http.NewRequestWithContext(ctx, "GET", feed.FetchURI, nil)
if err != nil {
return nil, fmt.Errorf("creating HTTP request failed: %w", err)
}
req.Header.Add("Accept", "text/plain")
if !feed.LastModified.Valid {
req.Header.Add("If-Modified-Since", feed.LastModified.Time.Format(http.TimeFormat))
}
if feed.ETag.Valid {
req.Header.Add("If-None-Match", feed.ETag.String)
}
if feed.DiscloseFeedURL != "" && feed.DiscloseNick != "" {
req.Header.Set("User-Agent", fmt.Sprintf("xt/%s (+%s; @%s)",
feed.Version, feed.DiscloseFeedURL, feed.DiscloseNick))
} else {
req.Header.Set("User-Agent", fmt.Sprintf("xt/%s", feed.Version))
}
return req, nil
}