xt/fetcher.go
2025-02-15 16:12:42 -07:00

156 lines
3.0 KiB
Go

package main
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"sync"
"time"
)
var (
ErrUnmodified = errors.New("unmodified")
ErrPermanentlyDead = errors.New("permanently dead")
ErrTemporarilyDead = errors.New("temporarily dead")
ErrParseFailed = errors.New("parse failed")
)
type Response struct {
Request *Feed
*http.Response
err error
}
func (r *Response) ETag() string {
return r.Header.Get("ETag")
}
func (r *Response) Read(b []byte) (int, error) {
return r.Body.Read(b)
}
// Close closes the Response.Body, which is necessary to free up resources
func (r *Response) Close() {
r.Body.Close()
}
func (r *Response) ContentType() string {
return r.Header.Get("Content-Type")
}
func (r *Response) LastModified() time.Time {
lastModified := time.Now()
if lm, err := time.Parse(http.TimeFormat, r.Header.Get("Last-Modified")); err == nil {
lastModified = lm
}
return lastModified
}
type httpFetcher struct {
client *http.Client
}
func NewHTTPFetcher() *httpFetcher {
return &httpFetcher{
client: &http.Client{
Transport: &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 5 * time.Second,
KeepAlive: 5 * time.Second,
}).DialContext,
ForceAttemptHTTP2: false,
MaxIdleConns: 100,
IdleConnTimeout: 10 * time.Second,
TLSHandshakeTimeout: 10 * time.Second,
ExpectContinueTimeout: 1 * time.Second,
},
},
}
}
func (f *httpFetcher) Fetch(ctx context.Context, request *Feed) *Response {
response := &Response{
Request: request,
}
req, err := request.MakeHTTPRequest(ctx)
if err != nil {
response.err = err
return response
}
res, err := f.client.Do(req)
if err != nil {
if errors.Is(err, &net.DNSError{}) {
response.err = fmt.Errorf("%w: %s", ErrTemporarilyDead, err)
return response
}
response.err = fmt.Errorf("%w: %w", ErrTemporarilyDead, err)
return response
}
response.Response = res
switch res.StatusCode {
case 200:
case 304:
response.err = fmt.Errorf("%w: %s", ErrUnmodified, res.Status)
case 400, 406, 502, 503:
response.err = fmt.Errorf("%w: %s", ErrTemporarilyDead, res.Status)
case 403, 404, 410:
response.err = fmt.Errorf("%w: %s", ErrPermanentlyDead, res.Status)
default:
response.err = errors.New(res.Status)
}
return response
}
type pool[IN, OUT any] struct {
in chan IN
out chan OUT
Err error
}
func NewFuncPool[IN, OUT any](
ctx context.Context,
size int,
fetch func(ctx context.Context, request IN) OUT,
) (*pool[IN, OUT], func()) {
var wg sync.WaitGroup
in := make(chan IN, size)
out := make(chan OUT, size)
wg.Add(size)
for range size {
go func() {
defer wg.Done()
for request := range in {
select {
case <-ctx.Done():
return
case out <- fetch(ctx, request):
}
}
}()
}
return &pool[IN, OUT]{
in: in,
out: out,
}, func() { close(in); wg.Wait(); close(out) }
}
func (f *pool[IN, OUT]) Fetch(request IN) {
f.in <- request
}
func (f *pool[IN, OUT]) Out() <-chan OUT {
return f.out
}