diff --git a/ffmpeg/ffmpeg.c b/ffmpeg/ffmpeg.c index 39cc9e1..538ab57 100644 --- a/ffmpeg/ffmpeg.c +++ b/ffmpeg/ffmpeg.c @@ -294,8 +294,6 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame) { } int frames_in_128mb = (1 << 30) / (av_get_bits_per_pixel(thumb_ctx->desc) * frame->height * frame->width); thumb_ctx->max_frames = FFMIN(nb_frames, frames_in_128mb); -// thumb_ctx->hist_size = 0; -// thumb_ctx->alpha = 0; int i; for (i = 0; i < thumb_ctx->desc->nb_components; i++) { thumb_ctx->hist_size += 1 << thumb_ctx->desc->comp[i].depth; @@ -353,19 +351,6 @@ static double root_mean_square_error(const int *hist, const double *median, size return sum_sq_err; } -static int alpha_check(const AVFrame *frame, const uint64_t flags, const int last_hist_num) { - if (flags & AV_PIX_FMT_FLAG_PAL) { - for (int i = 3; i <= 1023; i += 4) { - if (frame->data[1][i] != 255) { - return 1; - } - } - } else if (flags & AV_PIX_FMT_FLAG_ALPHA && last_hist_num < frame->width * frame->height) { - return 1; - } - return 0; -} - void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) { const AVPixFmtDescriptor *desc = thumb_ctx->desc; thumb_ctx->frames[n].frame = frame; @@ -420,10 +405,17 @@ void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) { } } -static AVFrame *get_best_frame(ThumbContext *thumb_ctx) { +int find_best_frame_index(ThumbContext *thumb_ctx) { + int i, j, n = 0, m = thumb_ctx->n, *hist = NULL; + double *median = thumb_ctx->median; + for (j = 0; j < m; j++) { + hist = thumb_ctx->frames[j].hist; + for (i = 0; i < thumb_ctx->hist_size; i++) { + median[i] += (double) hist[i] / m; + } + } struct thumb_frame *t_frame = NULL; double min_sum_sq_err = DBL_MAX, sum_sq_err = 0; - int i, n = 0; for (i = 0; i < thumb_ctx->n; i++) { t_frame = thumb_ctx->frames + i; sum_sq_err = root_mean_square_error(t_frame->hist, thumb_ctx->median, thumb_ctx->hist_size); @@ -432,21 +424,9 @@ static AVFrame *get_best_frame(ThumbContext *thumb_ctx) { n = i; } } - thumb_ctx->alpha = alpha_check( - thumb_ctx->frames[n].frame, - thumb_ctx->desc->flags, - thumb_ctx->frames[n].hist[thumb_ctx->hist_size - 1]); - return thumb_ctx->frames[n].frame; + return n; } -AVFrame *process_frames(ThumbContext *thumb_ctx) { - int i, j, *hist = NULL, n = thumb_ctx->n; - double *median = thumb_ctx->median; - for (j = 0; j < n; j++) { - hist = thumb_ctx->frames[j].hist; - for (i = 0; i < thumb_ctx->hist_size; i++) { - median[i] += (double) hist[i] / n; - } - } - return get_best_frame(thumb_ctx); +AVFrame *select_frame(ThumbContext *thumb_ctx, int n) { + return thumb_ctx->frames[n].frame; } \ No newline at end of file diff --git a/ffmpeg/ffmpeg.go b/ffmpeg/ffmpeg.go index 978c18b..2e61683 100644 --- a/ffmpeg/ffmpeg.go +++ b/ffmpeg/ffmpeg.go @@ -9,6 +9,7 @@ import ( "context" "github.com/cshum/imagor/vips/pointer" "io" + "math" "time" "unsafe" ) @@ -31,39 +32,37 @@ type Metadata struct { FPS int `json:"fps,omitempty"` HasVideo bool `json:"has_video"` HasAudio bool `json:"has_audio"` - HasAlpha bool `json:"has_alpha"` } type AVContext struct { - context context.Context - opaque unsafe.Pointer - reader io.Reader - seeker io.Seeker - formatContext *C.AVFormatContext - stream *C.AVStream - codecContext *C.AVCodecContext - thumbContext *C.ThumbContext - selectedFrame *C.AVFrame - outputFrame *C.AVFrame - durationInFormat bool - + context context.Context + opaque unsafe.Pointer + reader io.Reader + seeker io.Seeker + formatContext *C.AVFormatContext + stream *C.AVStream + codecContext *C.AVCodecContext + thumbContext *C.ThumbContext + selectedIndex C.int + frame *C.AVFrame + durationInFormat bool orientation int size int64 duration time.Duration - frameAt int + indexAt C.int durationAt time.Duration width, height int title, artist string hasVideo, hasAudio bool - hasAlpha bool closed bool } func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContext, error) { av := &AVContext{ - context: ctx, - reader: reader, - size: size, + context: ctx, + reader: reader, + size: size, + selectedIndex: -1, } if seeker, ok := reader.(io.Seeker); ok { av.seeker = seeker @@ -81,36 +80,30 @@ func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContex if err := createDecoder(av); err != nil { return av, err } - if err := createThumbContext(av); err != nil { - return av, err - } - if err := convertFrameToRGB(av); err != nil { - return av, err - } return av, nil } -func closeAVContext(av *AVContext) { - if !av.closed { - if av.outputFrame != nil { - C.av_frame_free(&av.outputFrame) - } - if av.thumbContext != nil { - C.free_thumb_context(av.thumbContext) - av.selectedFrame = nil - } - if av.codecContext != nil { - C.avcodec_free_context(&av.codecContext) - } - if av.formatContext != nil { - C.free_format_context(av.formatContext) - } - pointer.Unref(av.opaque) +func (av *AVContext) ProcessFrames() (err error) { + if av.thumbContext == nil { + return createThumbContext(av) } + return } -func (av *AVContext) Export() (buf []byte, err error) { - return exportBuffer(av) +func (av *AVContext) Export(bands int) (buf []byte, err error) { + if err = av.ProcessFrames(); err != nil { + return + } + if av.selectedIndex < 0 { + findBestFrameIndex(av) + } + if bands < 3 || bands > 4 { + bands = 3 + } + if err = convertFrameToRGB(av, bands); err != nil { + return + } + return exportBuffer(av, bands) } func (av *AVContext) Close() { @@ -120,7 +113,7 @@ func (av *AVContext) Close() { func (av *AVContext) Metadata() *Metadata { var fps float64 if av.durationAt > 0 { - fps = float64(av.frameAt) * float64(time.Second) / float64(av.durationAt) + fps = float64(av.indexAt) * float64(time.Second) / float64(av.durationAt) } return &Metadata{ Orientation: av.orientation, @@ -129,10 +122,28 @@ func (av *AVContext) Metadata() *Metadata { Height: av.height, Title: av.title, Artist: av.artist, - FPS: int(fps), + FPS: int(math.Round(fps)), HasVideo: av.hasVideo, HasAudio: av.hasAudio, - HasAlpha: av.hasAlpha, + } +} + +func closeAVContext(av *AVContext) { + if !av.closed { + if av.frame != nil { + C.av_frame_free(&av.frame) + } + if av.thumbContext != nil { + C.free_thumb_context(av.thumbContext) + } + if av.codecContext != nil { + C.avcodec_free_context(&av.codecContext) + } + if av.formatContext != nil { + C.free_format_context(av.formatContext) + } + pointer.Unref(av.opaque) + av.closed = true } } @@ -147,7 +158,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error { pointer.Unref(av.opaque) return avError(intErr) } - metaData(av) + metadata(av) duration(av) err := findStreams(av) if err != nil { @@ -157,7 +168,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error { return err } -func metaData(av *AVContext) { +func metadata(av *AVContext) { var artist, title *C.char C.get_metadata(av.formatContext, &artist, &title) av.artist = C.GoString(artist) @@ -195,8 +206,8 @@ func createDecoder(av *AVContext) error { return nil } -func incrementDuration(av *AVContext, frame *C.AVFrame, i int) { - av.frameAt = i +func incrementDuration(av *AVContext, frame *C.AVFrame, i C.int) { + av.indexAt = i if frame.pts != C.AV_NOPTS_VALUE { ptsToNano := C.int64_t(1000000000 * av.stream.time_base.num / av.stream.time_base.den) newDuration := time.Duration(frame.pts * ptsToNano) @@ -260,7 +271,7 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str if err < 0 { break } - incrementDuration(av, frame, int(i)) + incrementDuration(av, frame, i) frames <- frame frame = nil } @@ -275,32 +286,31 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str if err != 0 && err != C.int(ErrEOF) { return avError(err) } - av.selectedFrame = C.process_frames(av.thumbContext) - if av.selectedFrame == nil { - return ErrNoMem - } - av.hasAlpha = av.thumbContext.alpha != 0 return nil } -func convertFrameToRGB(av *AVContext) error { - av.outputFrame = C.convert_frame_to_rgb(av.selectedFrame, av.thumbContext.alpha) - if av.outputFrame == nil { +func findBestFrameIndex(av *AVContext) { + av.selectedIndex = C.find_best_frame_index(av.thumbContext) +} + +func convertFrameToRGB(av *AVContext, bands int) error { + var alpha int + if bands == 4 { + alpha = 1 + } + av.frame = C.convert_frame_to_rgb( + C.select_frame(av.thumbContext, av.selectedIndex), C.int(alpha)) + if av.frame == nil { return ErrNoMem } return nil } -func exportBuffer(av *AVContext) ([]byte, error) { - if av.outputFrame == nil { +func exportBuffer(av *AVContext, bands int) ([]byte, error) { + if av.frame == nil { return nil, ErrInvalidData } - size := av.height * av.width - if av.hasAlpha { - size *= 4 - } else { - size *= 3 - } - buf := C.GoBytes(unsafe.Pointer(av.outputFrame.data[0]), C.int(size)) + size := av.height * av.width * bands + buf := C.GoBytes(unsafe.Pointer(av.frame.data[0]), C.int(size)) return buf, nil } diff --git a/ffmpeg/ffmpeg.h b/ffmpeg/ffmpeg.h index ed78e2b..e65925c 100644 --- a/ffmpeg/ffmpeg.h +++ b/ffmpeg/ffmpeg.h @@ -27,7 +27,7 @@ struct thumb_frame { }; typedef struct ThumbContext { - int n, alpha, max_frames; + int n, max_frames; struct thumb_frame *frames; double *median; const AVPixFmtDescriptor *desc; @@ -59,7 +59,9 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame); void free_thumb_context(ThumbContext *thumb_ctx); -AVFrame *process_frames(ThumbContext *thumb_ctx); +int find_best_frame_index(ThumbContext *thumb_ctx); + +AVFrame *select_frame(ThumbContext *thumb_ctx, int i); void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame); diff --git a/ffmpeg/ffmpeg_test.go b/ffmpeg/ffmpeg_test.go index 72587cf..640d65f 100644 --- a/ffmpeg/ffmpeg_test.go +++ b/ffmpeg/ffmpeg_test.go @@ -57,7 +57,8 @@ func TestAVContext(t *testing.T) { av, err := LoadAVContext(ctx, reader, stats.Size()) require.NoError(t, err) defer av.Close() - + err = av.ProcessFrames() + require.NoError(t, err) meta := av.Metadata() metaBuf, err := json.Marshal(meta) require.NoError(t, err) @@ -67,13 +68,9 @@ func TestAVContext(t *testing.T) { } else { require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666)) } - - buf, err := av.Export() + bands := 4 + buf, err := av.Export(bands) require.NoError(t, err) - bands := 3 - if meta.HasAlpha { - bands = 4 - } img, err := vips.LoadImageFromMemory(buf, meta.Width, meta.Height, bands) require.NoError(t, err) buf, err = img.ExportJpeg(nil) diff --git a/processor.go b/processor.go index ecfd1d9..ae1cf3b 100644 --- a/processor.go +++ b/processor.go @@ -112,6 +112,23 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp return } defer av.Close() + bands := 3 + for _, filter := range params.Filters { + switch filter.Name { + case "format": + if s := strings.ToLower(filter.Args); s == "webp" || s == "png" { + switch mime.Extension() { + case ".webm", ".flv", ".mov", ".avi": + bands = 4 + } + break + } + case "process_frames": + if err = av.ProcessFrames(); err != nil { + return + } + } + } meta := av.Metadata() if params.Meta { out = imagor.NewBlobFromJsonMarshal(Metadata{ @@ -129,17 +146,13 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp case 8: filters = append(filters, imagorpath.Filter{Name: "orient", Args: "90"}) } - buf, err := av.Export() + buf, err := av.Export(bands) if err != nil || len(buf) == 0 { if err == nil { err = imagor.ErrUnsupportedFormat } return } - bands := 3 - if meta.HasAlpha { - bands = 4 - } out = imagor.NewBlobFromMemory(buf, meta.Width, meta.Height, bands) if len(filters) > 0 { diff --git a/processor_test.go b/processor_test.go index 68d0dd4..33625d7 100644 --- a/processor_test.go +++ b/processor_test.go @@ -42,12 +42,14 @@ func TestProcessor(t *testing.T) { }) doGoldenTests(t, filepath.Join(testDataDir, "golden/result"), []test{ {name: "mkv", path: "fit-in/100x100/everybody-betray-me.mkv"}, + {name: "mkv meta process_frames", path: "meta/filters:process_frames()/everybody-betray-me.mkv"}, {name: "mkv meta", path: "meta/everybody-betray-me.mkv"}, {name: "mp4", path: "200x100/schizo_0.mp4"}, {name: "mp4 orient 90", path: "220x100/schizo_90.mp4"}, {name: "mp4 orient 180", path: "200x100/schizo_180.mp4"}, {name: "mp4 orient 270", path: "200x100/schizo_270.mp4"}, {name: "image", path: "fit-in/100x100/demo.png"}, + {name: "alpha", path: "fit-in/filters:format(webp)/alpha-webm.webm"}, {name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406}, }, WithDebug(true), WithLogger(zap.NewExample())) doGoldenTests(t, filepath.Join(testDataDir, "golden/result-fallback-image"), []test{ diff --git a/testdata/golden/meta/alpha-webm.webm.meta.json b/testdata/golden/meta/alpha-webm.webm.meta.json index 74760fe..e45c027 100644 --- a/testdata/golden/meta/alpha-webm.webm.meta.json +++ b/testdata/golden/meta/alpha-webm.webm.meta.json @@ -1 +1 @@ -{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false,"has_alpha":true} \ No newline at end of file +{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false} \ No newline at end of file diff --git a/testdata/golden/meta/everybody-betray-me.mkv.meta.json b/testdata/golden/meta/everybody-betray-me.mkv.meta.json index d656fa9..cb2195e 100644 --- a/testdata/golden/meta/everybody-betray-me.mkv.meta.json +++ b/testdata/golden/meta/everybody-betray-me.mkv.meta.json @@ -1 +1 @@ -{"orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/meta/macabre.mp4.meta.json b/testdata/golden/meta/macabre.mp4.meta.json index 874b345..419bf8d 100644 --- a/testdata/golden/meta/macabre.mp4.meta.json +++ b/testdata/golden/meta/macabre.mp4.meta.json @@ -1 +1 @@ -{"orientation":1,"duration":3925,"width":492,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":1,"duration":3925,"width":492,"height":360,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/meta/schizo.flv.meta.json b/testdata/golden/meta/schizo.flv.meta.json index be102a6..3e24c24 100644 --- a/testdata/golden/meta/schizo.flv.meta.json +++ b/testdata/golden/meta/schizo.flv.meta.json @@ -1 +1 @@ -{"orientation":1,"duration":2560,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":1,"duration":2560,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/meta/schizo_0.mp4.meta.json b/testdata/golden/meta/schizo_0.mp4.meta.json index 9c3ecd3..3dda84e 100644 --- a/testdata/golden/meta/schizo_0.mp4.meta.json +++ b/testdata/golden/meta/schizo_0.mp4.meta.json @@ -1 +1 @@ -{"orientation":1,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":1,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/meta/schizo_180.mp4.meta.json b/testdata/golden/meta/schizo_180.mp4.meta.json index 3004de4..f992f3e 100644 --- a/testdata/golden/meta/schizo_180.mp4.meta.json +++ b/testdata/golden/meta/schizo_180.mp4.meta.json @@ -1 +1 @@ -{"orientation":3,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":3,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/meta/schizo_270.mp4.meta.json b/testdata/golden/meta/schizo_270.mp4.meta.json index 9c1e64f..10bc562 100644 --- a/testdata/golden/meta/schizo_270.mp4.meta.json +++ b/testdata/golden/meta/schizo_270.mp4.meta.json @@ -1 +1 @@ -{"orientation":6,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":6,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/meta/schizo_90.mp4.meta.json b/testdata/golden/meta/schizo_90.mp4.meta.json index f2bb5f1..ee3a3aa 100644 --- a/testdata/golden/meta/schizo_90.mp4.meta.json +++ b/testdata/golden/meta/schizo_90.mp4.meta.json @@ -1 +1 @@ -{"orientation":8,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"orientation":8,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/result/fit-in/filters%3Aformat%28webp%29/alpha-webm.webm b/testdata/golden/result/fit-in/filters%3Aformat%28webp%29/alpha-webm.webm new file mode 100644 index 0000000..5809100 Binary files /dev/null and b/testdata/golden/result/fit-in/filters%3Aformat%28webp%29/alpha-webm.webm differ diff --git a/testdata/golden/result/meta/everybody-betray-me.mkv b/testdata/golden/result/meta/everybody-betray-me.mkv index 8af18c2..045e431 100644 --- a/testdata/golden/result/meta/everybody-betray-me.mkv +++ b/testdata/golden/result/meta/everybody-betray-me.mkv @@ -1 +1 @@ -{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} \ No newline at end of file +{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"has_video":true,"has_audio":true} \ No newline at end of file diff --git a/testdata/golden/result/meta/filters%3Aprocess_frames%28%29/everybody-betray-me.mkv b/testdata/golden/result/meta/filters%3Aprocess_frames%28%29/everybody-betray-me.mkv new file mode 100644 index 0000000..87a8093 --- /dev/null +++ b/testdata/golden/result/meta/filters%3Aprocess_frames%28%29/everybody-betray-me.mkv @@ -0,0 +1 @@ +{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true} \ No newline at end of file