refactor(ffmpeg): refactor process frames

* refactor best frame process * refactor best frame process, remove alpha * move thumb context to export * process_frames() filter * test: update golden files * process_frames() filter * test: update golden files * tweak fps * test: update golden files * cleanup
2022-10-13 15:23:23 +08:00 · 2022-10-13 15:23:23 +08:00 · 4d71837fb8
commit 4d71837fb8
parent c86b39430d
17 changed files with 127 additions and 122 deletions
--- a/ffmpeg/ffmpeg.c
+++ b/ffmpeg/ffmpeg.c
@ -294,8 +294,6 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame) {
    }
    int frames_in_128mb = (1 << 30) / (av_get_bits_per_pixel(thumb_ctx->desc) * frame->height * frame->width);
    thumb_ctx->max_frames = FFMIN(nb_frames, frames_in_128mb);
-//    thumb_ctx->hist_size = 0;
-//    thumb_ctx->alpha = 0;
    int i;
    for (i = 0; i < thumb_ctx->desc->nb_components; i++) {
        thumb_ctx->hist_size += 1 << thumb_ctx->desc->comp[i].depth;
@ -353,19 +351,6 @@ static double root_mean_square_error(const int *hist, const double *median, size
    return sum_sq_err;
 }

-static int alpha_check(const AVFrame *frame, const uint64_t flags, const int last_hist_num) {
-    if (flags & AV_PIX_FMT_FLAG_PAL) {
-        for (int i = 3; i <= 1023; i += 4) {
-            if (frame->data[1][i] != 255) {
-                return 1;
-            }
-        }
-    } else if (flags & AV_PIX_FMT_FLAG_ALPHA && last_hist_num < frame->width * frame->height) {
-        return 1;
-    }
-    return 0;
-}
-
 void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) {
    const AVPixFmtDescriptor *desc = thumb_ctx->desc;
    thumb_ctx->frames[n].frame = frame;
@ -420,10 +405,17 @@ void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) {
    }
 }

-static AVFrame *get_best_frame(ThumbContext *thumb_ctx) {
+int find_best_frame_index(ThumbContext *thumb_ctx) {
+    int i, j, n = 0, m = thumb_ctx->n, *hist = NULL;
+    double *median = thumb_ctx->median;
+    for (j = 0; j < m; j++) {
+        hist = thumb_ctx->frames[j].hist;
+        for (i = 0; i < thumb_ctx->hist_size; i++) {
+            median[i] += (double) hist[i] / m;
+        }
+    }
    struct thumb_frame *t_frame = NULL;
    double min_sum_sq_err = DBL_MAX, sum_sq_err = 0;
-    int i, n = 0;
    for (i = 0; i < thumb_ctx->n; i++) {
        t_frame = thumb_ctx->frames + i;
        sum_sq_err = root_mean_square_error(t_frame->hist, thumb_ctx->median, thumb_ctx->hist_size);
@ -432,21 +424,9 @@ static AVFrame *get_best_frame(ThumbContext *thumb_ctx) {
            n = i;
        }
    }
-    thumb_ctx->alpha = alpha_check(
-      thumb_ctx->frames[n].frame,
-      thumb_ctx->desc->flags,
-      thumb_ctx->frames[n].hist[thumb_ctx->hist_size - 1]);
-    return thumb_ctx->frames[n].frame;
+    return n;
 }

-AVFrame *process_frames(ThumbContext *thumb_ctx) {
-    int i, j, *hist = NULL, n = thumb_ctx->n;
-    double *median = thumb_ctx->median;
-    for (j = 0; j < n; j++) {
-        hist = thumb_ctx->frames[j].hist;
-        for (i = 0; i < thumb_ctx->hist_size; i++) {
-            median[i] += (double) hist[i] / n;
-        }
-    }
-    return get_best_frame(thumb_ctx);
+AVFrame *select_frame(ThumbContext *thumb_ctx, int n) {
+    return thumb_ctx->frames[n].frame;
 }
--- a/ffmpeg/ffmpeg.go
+++ b/ffmpeg/ffmpeg.go
@ -9,6 +9,7 @@ import (
 	"context"
 	"github.com/cshum/imagor/vips/pointer"
 	"io"
+	"math"
 	"time"
 	"unsafe"
 )
@ -31,39 +32,37 @@ type Metadata struct {
 	FPS         int    `json:"fps,omitempty"`
 	HasVideo    bool   `json:"has_video"`
 	HasAudio    bool   `json:"has_audio"`
-	HasAlpha    bool   `json:"has_alpha"`
 }

 type AVContext struct {
-	context          context.Context
-	opaque           unsafe.Pointer
-	reader           io.Reader
-	seeker           io.Seeker
-	formatContext    *C.AVFormatContext
-	stream           *C.AVStream
-	codecContext     *C.AVCodecContext
-	thumbContext     *C.ThumbContext
-	selectedFrame    *C.AVFrame
-	outputFrame      *C.AVFrame
-	durationInFormat bool
-
+	context            context.Context
+	opaque             unsafe.Pointer
+	reader             io.Reader
+	seeker             io.Seeker
+	formatContext      *C.AVFormatContext
+	stream             *C.AVStream
+	codecContext       *C.AVCodecContext
+	thumbContext       *C.ThumbContext
+	selectedIndex      C.int
+	frame              *C.AVFrame
+	durationInFormat   bool
 	orientation        int
 	size               int64
 	duration           time.Duration
-	frameAt            int
+	indexAt            C.int
 	durationAt         time.Duration
 	width, height      int
 	title, artist      string
 	hasVideo, hasAudio bool
-	hasAlpha           bool
 	closed             bool
 }

 func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContext, error) {
 	av := &AVContext{
-		context: ctx,
-		reader:  reader,
-		size:    size,
+		context:       ctx,
+		reader:        reader,
+		size:          size,
+		selectedIndex: -1,
 	}
 	if seeker, ok := reader.(io.Seeker); ok {
 		av.seeker = seeker
@ -81,36 +80,30 @@ func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContex
 	if err := createDecoder(av); err != nil {
 		return av, err
 	}
-	if err := createThumbContext(av); err != nil {
-		return av, err
-	}
-	if err := convertFrameToRGB(av); err != nil {
-		return av, err
-	}
 	return av, nil
 }

-func closeAVContext(av *AVContext) {
-	if !av.closed {
-		if av.outputFrame != nil {
-			C.av_frame_free(&av.outputFrame)
-		}
-		if av.thumbContext != nil {
-			C.free_thumb_context(av.thumbContext)
-			av.selectedFrame = nil
-		}
-		if av.codecContext != nil {
-			C.avcodec_free_context(&av.codecContext)
-		}
-		if av.formatContext != nil {
-			C.free_format_context(av.formatContext)
-		}
-		pointer.Unref(av.opaque)
+func (av *AVContext) ProcessFrames() (err error) {
+	if av.thumbContext == nil {
+		return createThumbContext(av)
 	}
+	return
 }

-func (av *AVContext) Export() (buf []byte, err error) {
-	return exportBuffer(av)
+func (av *AVContext) Export(bands int) (buf []byte, err error) {
+	if err = av.ProcessFrames(); err != nil {
+		return
+	}
+	if av.selectedIndex < 0 {
+		findBestFrameIndex(av)
+	}
+	if bands < 3 || bands > 4 {
+		bands = 3
+	}
+	if err = convertFrameToRGB(av, bands); err != nil {
+		return
+	}
+	return exportBuffer(av, bands)
 }

 func (av *AVContext) Close() {
@ -120,7 +113,7 @@ func (av *AVContext) Close() {
 func (av *AVContext) Metadata() *Metadata {
 	var fps float64
 	if av.durationAt > 0 {
-		fps = float64(av.frameAt) * float64(time.Second) / float64(av.durationAt)
+		fps = float64(av.indexAt) * float64(time.Second) / float64(av.durationAt)
 	}
 	return &Metadata{
 		Orientation: av.orientation,
@ -129,10 +122,28 @@ func (av *AVContext) Metadata() *Metadata {
 		Height:      av.height,
 		Title:       av.title,
 		Artist:      av.artist,
-		FPS:         int(fps),
+		FPS:         int(math.Round(fps)),
 		HasVideo:    av.hasVideo,
 		HasAudio:    av.hasAudio,
-		HasAlpha:    av.hasAlpha,
+	}
+}
+
+func closeAVContext(av *AVContext) {
+	if !av.closed {
+		if av.frame != nil {
+			C.av_frame_free(&av.frame)
+		}
+		if av.thumbContext != nil {
+			C.free_thumb_context(av.thumbContext)
+		}
+		if av.codecContext != nil {
+			C.avcodec_free_context(&av.codecContext)
+		}
+		if av.formatContext != nil {
+			C.free_format_context(av.formatContext)
+		}
+		pointer.Unref(av.opaque)
+		av.closed = true
 	}
 }

@ -147,7 +158,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error {
 		pointer.Unref(av.opaque)
 		return avError(intErr)
 	}
-	metaData(av)
+	metadata(av)
 	duration(av)
 	err := findStreams(av)
 	if err != nil {
@ -157,7 +168,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error {
 	return err
 }

-func metaData(av *AVContext) {
+func metadata(av *AVContext) {
 	var artist, title *C.char
 	C.get_metadata(av.formatContext, &artist, &title)
 	av.artist = C.GoString(artist)
@ -195,8 +206,8 @@ func createDecoder(av *AVContext) error {
 	return nil
 }

-func incrementDuration(av *AVContext, frame *C.AVFrame, i int) {
-	av.frameAt = i
+func incrementDuration(av *AVContext, frame *C.AVFrame, i C.int) {
+	av.indexAt = i
 	if frame.pts != C.AV_NOPTS_VALUE {
 		ptsToNano := C.int64_t(1000000000 * av.stream.time_base.num / av.stream.time_base.den)
 		newDuration := time.Duration(frame.pts * ptsToNano)
@ -260,7 +271,7 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str
 		if err < 0 {
 			break
 		}
-		incrementDuration(av, frame, int(i))
+		incrementDuration(av, frame, i)
 		frames <- frame
 		frame = nil
 	}
@ -275,32 +286,31 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str
 	if err != 0 && err != C.int(ErrEOF) {
 		return avError(err)
 	}
-	av.selectedFrame = C.process_frames(av.thumbContext)
-	if av.selectedFrame == nil {
-		return ErrNoMem
-	}
-	av.hasAlpha = av.thumbContext.alpha != 0
 	return nil
 }

-func convertFrameToRGB(av *AVContext) error {
-	av.outputFrame = C.convert_frame_to_rgb(av.selectedFrame, av.thumbContext.alpha)
-	if av.outputFrame == nil {
+func findBestFrameIndex(av *AVContext) {
+	av.selectedIndex = C.find_best_frame_index(av.thumbContext)
+}
+
+func convertFrameToRGB(av *AVContext, bands int) error {
+	var alpha int
+	if bands == 4 {
+		alpha = 1
+	}
+	av.frame = C.convert_frame_to_rgb(
+		C.select_frame(av.thumbContext, av.selectedIndex), C.int(alpha))
+	if av.frame == nil {
 		return ErrNoMem
 	}
 	return nil
 }

-func exportBuffer(av *AVContext) ([]byte, error) {
-	if av.outputFrame == nil {
+func exportBuffer(av *AVContext, bands int) ([]byte, error) {
+	if av.frame == nil {
 		return nil, ErrInvalidData
 	}
-	size := av.height * av.width
-	if av.hasAlpha {
-		size *= 4
-	} else {
-		size *= 3
-	}
-	buf := C.GoBytes(unsafe.Pointer(av.outputFrame.data[0]), C.int(size))
+	size := av.height * av.width * bands
+	buf := C.GoBytes(unsafe.Pointer(av.frame.data[0]), C.int(size))
 	return buf, nil
 }
--- a/ffmpeg/ffmpeg.h
+++ b/ffmpeg/ffmpeg.h
@ -27,7 +27,7 @@ struct thumb_frame {
 };

 typedef struct ThumbContext {
-    int n, alpha, max_frames;
+    int n, max_frames;
    struct thumb_frame *frames;
    double *median;
    const AVPixFmtDescriptor *desc;
@ -59,7 +59,9 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame);

 void free_thumb_context(ThumbContext *thumb_ctx);

-AVFrame *process_frames(ThumbContext *thumb_ctx);
+int find_best_frame_index(ThumbContext *thumb_ctx);
+
+AVFrame *select_frame(ThumbContext *thumb_ctx, int i);

 void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame);

--- a/ffmpeg/ffmpeg_test.go
+++ b/ffmpeg/ffmpeg_test.go
@ -57,7 +57,8 @@ func TestAVContext(t *testing.T) {
 			av, err := LoadAVContext(ctx, reader, stats.Size())
 			require.NoError(t, err)
 			defer av.Close()
-
+			err = av.ProcessFrames()
+			require.NoError(t, err)
 			meta := av.Metadata()
 			metaBuf, err := json.Marshal(meta)
 			require.NoError(t, err)
@ -67,13 +68,9 @@ func TestAVContext(t *testing.T) {
 			} else {
 				require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666))
 			}
-
-			buf, err := av.Export()
+			bands := 4
+			buf, err := av.Export(bands)
 			require.NoError(t, err)
-			bands := 3
-			if meta.HasAlpha {
-				bands = 4
-			}
 			img, err := vips.LoadImageFromMemory(buf, meta.Width, meta.Height, bands)
 			require.NoError(t, err)
 			buf, err = img.ExportJpeg(nil)
--- a/processor.go
+++ b/processor.go
@ -112,6 +112,23 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
 		return
 	}
 	defer av.Close()
+	bands := 3
+	for _, filter := range params.Filters {
+		switch filter.Name {
+		case "format":
+			if s := strings.ToLower(filter.Args); s == "webp" || s == "png" {
+				switch mime.Extension() {
+				case ".webm", ".flv", ".mov", ".avi":
+					bands = 4
+				}
+				break
+			}
+		case "process_frames":
+			if err = av.ProcessFrames(); err != nil {
+				return
+			}
+		}
+	}
 	meta := av.Metadata()
 	if params.Meta {
 		out = imagor.NewBlobFromJsonMarshal(Metadata{
@ -129,17 +146,13 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
 	case 8:
 		filters = append(filters, imagorpath.Filter{Name: "orient", Args: "90"})
 	}
-	buf, err := av.Export()
+	buf, err := av.Export(bands)
 	if err != nil || len(buf) == 0 {
 		if err == nil {
 			err = imagor.ErrUnsupportedFormat
 		}
 		return
 	}
-	bands := 3
-	if meta.HasAlpha {
-		bands = 4
-	}
 	out = imagor.NewBlobFromMemory(buf, meta.Width, meta.Height, bands)

 	if len(filters) > 0 {
--- a/processor_test.go
+++ b/processor_test.go
@ -42,12 +42,14 @@ func TestProcessor(t *testing.T) {
 	})
 	doGoldenTests(t, filepath.Join(testDataDir, "golden/result"), []test{
 		{name: "mkv", path: "fit-in/100x100/everybody-betray-me.mkv"},
+		{name: "mkv meta process_frames", path: "meta/filters:process_frames()/everybody-betray-me.mkv"},
 		{name: "mkv meta", path: "meta/everybody-betray-me.mkv"},
 		{name: "mp4", path: "200x100/schizo_0.mp4"},
 		{name: "mp4 orient 90", path: "220x100/schizo_90.mp4"},
 		{name: "mp4 orient 180", path: "200x100/schizo_180.mp4"},
 		{name: "mp4 orient 270", path: "200x100/schizo_270.mp4"},
 		{name: "image", path: "fit-in/100x100/demo.png"},
+		{name: "alpha", path: "fit-in/filters:format(webp)/alpha-webm.webm"},
 		{name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406},
 	}, WithDebug(true), WithLogger(zap.NewExample()))
 	doGoldenTests(t, filepath.Join(testDataDir, "golden/result-fallback-image"), []test{
--- a/testdata/golden/meta/alpha-webm.webm.meta.json
+++ b/testdata/golden/meta/alpha-webm.webm.meta.json
@ -1 +1 @@
-{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false,"has_alpha":true}
+{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false}
--- a/testdata/golden/meta/everybody-betray-me.mkv.meta.json
+++ b/testdata/golden/meta/everybody-betray-me.mkv.meta.json
@ -1 +1 @@
-{"orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/meta/macabre.mp4.meta.json
+++ b/testdata/golden/meta/macabre.mp4.meta.json
@ -1 +1 @@
-{"orientation":1,"duration":3925,"width":492,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":1,"duration":3925,"width":492,"height":360,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/meta/schizo.flv.meta.json
+++ b/testdata/golden/meta/schizo.flv.meta.json
@ -1 +1 @@
-{"orientation":1,"duration":2560,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":1,"duration":2560,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/meta/schizo_0.mp4.meta.json
+++ b/testdata/golden/meta/schizo_0.mp4.meta.json
@ -1 +1 @@
-{"orientation":1,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":1,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/meta/schizo_180.mp4.meta.json
+++ b/testdata/golden/meta/schizo_180.mp4.meta.json
@ -1 +1 @@
-{"orientation":3,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":3,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/meta/schizo_270.mp4.meta.json
+++ b/testdata/golden/meta/schizo_270.mp4.meta.json
@ -1 +1 @@
-{"orientation":6,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":6,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/meta/schizo_90.mp4.meta.json
+++ b/testdata/golden/meta/schizo_90.mp4.meta.json
@ -1 +1 @@
-{"orientation":8,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"orientation":8,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true}
--- a/testdata/golden/result/fit-in/filters%3Aformat%28webp%29/alpha-webm.webm
+++ b/testdata/golden/result/fit-in/filters%3Aformat%28webp%29/alpha-webm.webm
--- a/testdata/golden/result/meta/everybody-betray-me.mkv
+++ b/testdata/golden/result/meta/everybody-betray-me.mkv
@ -1 +1 @@
-{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
+{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"has_video":true,"has_audio":true}
--- a/testdata/golden/result/meta/filters%3Aprocess_frames%28%29/everybody-betray-me.mkv
+++ b/testdata/golden/result/meta/filters%3Aprocess_frames%28%29/everybody-betray-me.mkv
@ -0,0 +1 @@
+{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}
				`@ -0,0 +1 @@`
				`{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}`