From 17d4d46d0978d9bab1449ac5bd8bc777bf8ec51c Mon Sep 17 00:00:00 2001 From: Failure Date: Tue, 6 Aug 2024 08:50:35 -0700 Subject: [PATCH] youtube handling + special site handling framework --- cmd/imagorvideo/main.go | 1 + config.go | 4 +++ loader.go | 43 +++++++++++++++++++++++++++++++ processor.go | 57 +++++++++++++++++++++++++++++++++++------ 4 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 loader.go diff --git a/cmd/imagorvideo/main.go b/cmd/imagorvideo/main.go index 01bab1a..0c0f6cd 100644 --- a/cmd/imagorvideo/main.go +++ b/cmd/imagorvideo/main.go @@ -13,6 +13,7 @@ import ( func main() { var server = config.CreateServer( os.Args[1:], + //imagorvideoextended.LoaderConfig, imagorvideoextended.Config, imagorvideo.Config, vipsconfig.WithVips, diff --git a/config.go b/config.go index 206368b..7281752 100644 --- a/config.go +++ b/config.go @@ -6,6 +6,10 @@ import ( "go.uber.org/zap" ) +func LoaderConfig(fs *flag.FlagSet, cb func() (*zap.Logger, bool)) imagor.Option { + return imagor.WithLoaders(NewSpecialLoader()) +} + // Config imagorvideo config.Option func Config(fs *flag.FlagSet, cb func() (*zap.Logger, bool)) imagor.Option { var ( diff --git a/loader.go b/loader.go new file mode 100644 index 0000000..078cb7a --- /dev/null +++ b/loader.go @@ -0,0 +1,43 @@ +package imagorvideoextended + +import ( + "github.com/cshum/imagor" + "net/http" + "net/url" + "strings" +) + +/* + This was started to handle YouTube, however it turns out they leak just enough info to be useful in the processing + phase. This can still be used for sites that have more info in the URL than the connection, though. + Currently unloaded to eke out some more perf +*/ + +type LoaderOption func(h *SpecialLoader) + +// SpecialLoader HTTP Loader implements imagor.Loader interface +type SpecialLoader struct { +} + +func NewSpecialLoader(options ...LoaderOption) *SpecialLoader { + loader := &SpecialLoader{} + for _, option := range options { + option(loader) + } + return loader +} + +func (loader *SpecialLoader) Get(_ *http.Request, key string) (*imagor.Blob, error) { + components, err := url.Parse(key) + if err != nil { + return nil, imagor.ErrInvalid + } + hostname := components.Hostname() + if trimmed, ok := strings.CutPrefix(hostname, "www."); ok { + hostname = trimmed + } + switch components.Hostname() { + default: + return nil, imagor.ErrInvalid + } +} diff --git a/processor.go b/processor.go index 0ceb2a7..187c0b0 100644 --- a/processor.go +++ b/processor.go @@ -9,7 +9,9 @@ import ( "github.com/gabriel-vasile/mimetype" "go.uber.org/zap" "io" + "net/url" "os" + "regexp" "strings" ) @@ -41,6 +43,37 @@ func (p *Processor) Shutdown(_ context.Context) error { return nil } +var youtubeIdRegex = regexp.MustCompile("[^\"&?\\/\\s\\.=]{11}") + +func specialUrl(path string) (hostname string, specialHandler string, specialData map[string]string, err error) { + specialData = make(map[string]string) + components, err := url.Parse(path) + + if err != nil { + return + } + hostname = components.Host + if trimmed, ok := strings.CutPrefix(hostname, "www."); ok { + hostname = trimmed + } + + switch hostname { + case "youtu.be": + fallthrough + case "youtube.com": + fallthrough + case "m.youtube.com": + id := youtubeIdRegex.FindString(path) + println(id) + if id != "" { + specialHandler = "youtube" + specialData["id"] = id + } + } + + return hostname, specialHandler, specialData, nil +} + func subThumbnail(url string) string { key := os.Getenv("IMAGOR_SECRET") params := imagorpath.Params{ @@ -102,12 +135,17 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp return nil, err } + hostname, specialHandler, specialData, _ := specialUrl(params.Image) + doc, err := htmlquery.Parse(strings.NewReader(string(all[:]))) meta := Metadata{ - Format: strings.TrimPrefix(mime.Extension(), "."), - Title: "", - Description: "", - Image: "", + Format: strings.TrimPrefix(mime.Extension(), "."), + Title: "", + Description: "", + Image: "", + Hostname: hostname, + SpecialHandler: specialHandler, + SpecialData: specialData, } metaTags := htmlquery.Find(doc, "//meta[@property]") for _, metaTag := range metaTags { @@ -148,10 +186,13 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp // Metadata imagorvideo metadata type Metadata struct { - Format string `json:"format"` - Title string `json:"title"` - Description string `json:"description"` - Image string `json:"image"` + Format string `json:"format"` + Title string `json:"title"` + Description string `json:"description"` + Image string `json:"image"` + Hostname string `json:"hostname"` + SpecialHandler string `json:"special_handler"` + SpecialData map[string]string `json:"special_data"` } var transPixel = []byte("\x47\x49\x46\x38\x39\x61\x01\x00\x01\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x21\xF9\x04\x01\x00\x00\x00\x00\x2C\x00\x00\x00\x00\x01\x00\x01\x00\x00\x02\x02\x44\x01\x00\x3B")