refactor(ffmpeg): refactor process frames

* refactor best frame process

* refactor best frame process, remove alpha

* move thumb context to export

* process_frames() filter

* test: update golden files

* process_frames() filter

* test: update golden files

* tweak fps

* test: update golden files

* cleanup
This commit is contained in:
Adrian Shum 2022-10-13 15:23:23 +08:00 committed by GitHub
parent c86b39430d
commit 4d71837fb8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 127 additions and 122 deletions

View file

@ -294,8 +294,6 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame) {
} }
int frames_in_128mb = (1 << 30) / (av_get_bits_per_pixel(thumb_ctx->desc) * frame->height * frame->width); int frames_in_128mb = (1 << 30) / (av_get_bits_per_pixel(thumb_ctx->desc) * frame->height * frame->width);
thumb_ctx->max_frames = FFMIN(nb_frames, frames_in_128mb); thumb_ctx->max_frames = FFMIN(nb_frames, frames_in_128mb);
// thumb_ctx->hist_size = 0;
// thumb_ctx->alpha = 0;
int i; int i;
for (i = 0; i < thumb_ctx->desc->nb_components; i++) { for (i = 0; i < thumb_ctx->desc->nb_components; i++) {
thumb_ctx->hist_size += 1 << thumb_ctx->desc->comp[i].depth; thumb_ctx->hist_size += 1 << thumb_ctx->desc->comp[i].depth;
@ -353,19 +351,6 @@ static double root_mean_square_error(const int *hist, const double *median, size
return sum_sq_err; return sum_sq_err;
} }
static int alpha_check(const AVFrame *frame, const uint64_t flags, const int last_hist_num) {
if (flags & AV_PIX_FMT_FLAG_PAL) {
for (int i = 3; i <= 1023; i += 4) {
if (frame->data[1][i] != 255) {
return 1;
}
}
} else if (flags & AV_PIX_FMT_FLAG_ALPHA && last_hist_num < frame->width * frame->height) {
return 1;
}
return 0;
}
void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) { void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) {
const AVPixFmtDescriptor *desc = thumb_ctx->desc; const AVPixFmtDescriptor *desc = thumb_ctx->desc;
thumb_ctx->frames[n].frame = frame; thumb_ctx->frames[n].frame = frame;
@ -420,10 +405,17 @@ void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) {
} }
} }
static AVFrame *get_best_frame(ThumbContext *thumb_ctx) { int find_best_frame_index(ThumbContext *thumb_ctx) {
int i, j, n = 0, m = thumb_ctx->n, *hist = NULL;
double *median = thumb_ctx->median;
for (j = 0; j < m; j++) {
hist = thumb_ctx->frames[j].hist;
for (i = 0; i < thumb_ctx->hist_size; i++) {
median[i] += (double) hist[i] / m;
}
}
struct thumb_frame *t_frame = NULL; struct thumb_frame *t_frame = NULL;
double min_sum_sq_err = DBL_MAX, sum_sq_err = 0; double min_sum_sq_err = DBL_MAX, sum_sq_err = 0;
int i, n = 0;
for (i = 0; i < thumb_ctx->n; i++) { for (i = 0; i < thumb_ctx->n; i++) {
t_frame = thumb_ctx->frames + i; t_frame = thumb_ctx->frames + i;
sum_sq_err = root_mean_square_error(t_frame->hist, thumb_ctx->median, thumb_ctx->hist_size); sum_sq_err = root_mean_square_error(t_frame->hist, thumb_ctx->median, thumb_ctx->hist_size);
@ -432,21 +424,9 @@ static AVFrame *get_best_frame(ThumbContext *thumb_ctx) {
n = i; n = i;
} }
} }
thumb_ctx->alpha = alpha_check( return n;
thumb_ctx->frames[n].frame,
thumb_ctx->desc->flags,
thumb_ctx->frames[n].hist[thumb_ctx->hist_size - 1]);
return thumb_ctx->frames[n].frame;
} }
AVFrame *process_frames(ThumbContext *thumb_ctx) { AVFrame *select_frame(ThumbContext *thumb_ctx, int n) {
int i, j, *hist = NULL, n = thumb_ctx->n; return thumb_ctx->frames[n].frame;
double *median = thumb_ctx->median;
for (j = 0; j < n; j++) {
hist = thumb_ctx->frames[j].hist;
for (i = 0; i < thumb_ctx->hist_size; i++) {
median[i] += (double) hist[i] / n;
}
}
return get_best_frame(thumb_ctx);
} }

View file

@ -9,6 +9,7 @@ import (
"context" "context"
"github.com/cshum/imagor/vips/pointer" "github.com/cshum/imagor/vips/pointer"
"io" "io"
"math"
"time" "time"
"unsafe" "unsafe"
) )
@ -31,39 +32,37 @@ type Metadata struct {
FPS int `json:"fps,omitempty"` FPS int `json:"fps,omitempty"`
HasVideo bool `json:"has_video"` HasVideo bool `json:"has_video"`
HasAudio bool `json:"has_audio"` HasAudio bool `json:"has_audio"`
HasAlpha bool `json:"has_alpha"`
} }
type AVContext struct { type AVContext struct {
context context.Context context context.Context
opaque unsafe.Pointer opaque unsafe.Pointer
reader io.Reader reader io.Reader
seeker io.Seeker seeker io.Seeker
formatContext *C.AVFormatContext formatContext *C.AVFormatContext
stream *C.AVStream stream *C.AVStream
codecContext *C.AVCodecContext codecContext *C.AVCodecContext
thumbContext *C.ThumbContext thumbContext *C.ThumbContext
selectedFrame *C.AVFrame selectedIndex C.int
outputFrame *C.AVFrame frame *C.AVFrame
durationInFormat bool durationInFormat bool
orientation int orientation int
size int64 size int64
duration time.Duration duration time.Duration
frameAt int indexAt C.int
durationAt time.Duration durationAt time.Duration
width, height int width, height int
title, artist string title, artist string
hasVideo, hasAudio bool hasVideo, hasAudio bool
hasAlpha bool
closed bool closed bool
} }
func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContext, error) { func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContext, error) {
av := &AVContext{ av := &AVContext{
context: ctx, context: ctx,
reader: reader, reader: reader,
size: size, size: size,
selectedIndex: -1,
} }
if seeker, ok := reader.(io.Seeker); ok { if seeker, ok := reader.(io.Seeker); ok {
av.seeker = seeker av.seeker = seeker
@ -81,36 +80,30 @@ func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContex
if err := createDecoder(av); err != nil { if err := createDecoder(av); err != nil {
return av, err return av, err
} }
if err := createThumbContext(av); err != nil {
return av, err
}
if err := convertFrameToRGB(av); err != nil {
return av, err
}
return av, nil return av, nil
} }
func closeAVContext(av *AVContext) { func (av *AVContext) ProcessFrames() (err error) {
if !av.closed { if av.thumbContext == nil {
if av.outputFrame != nil { return createThumbContext(av)
C.av_frame_free(&av.outputFrame)
}
if av.thumbContext != nil {
C.free_thumb_context(av.thumbContext)
av.selectedFrame = nil
}
if av.codecContext != nil {
C.avcodec_free_context(&av.codecContext)
}
if av.formatContext != nil {
C.free_format_context(av.formatContext)
}
pointer.Unref(av.opaque)
} }
return
} }
func (av *AVContext) Export() (buf []byte, err error) { func (av *AVContext) Export(bands int) (buf []byte, err error) {
return exportBuffer(av) if err = av.ProcessFrames(); err != nil {
return
}
if av.selectedIndex < 0 {
findBestFrameIndex(av)
}
if bands < 3 || bands > 4 {
bands = 3
}
if err = convertFrameToRGB(av, bands); err != nil {
return
}
return exportBuffer(av, bands)
} }
func (av *AVContext) Close() { func (av *AVContext) Close() {
@ -120,7 +113,7 @@ func (av *AVContext) Close() {
func (av *AVContext) Metadata() *Metadata { func (av *AVContext) Metadata() *Metadata {
var fps float64 var fps float64
if av.durationAt > 0 { if av.durationAt > 0 {
fps = float64(av.frameAt) * float64(time.Second) / float64(av.durationAt) fps = float64(av.indexAt) * float64(time.Second) / float64(av.durationAt)
} }
return &Metadata{ return &Metadata{
Orientation: av.orientation, Orientation: av.orientation,
@ -129,10 +122,28 @@ func (av *AVContext) Metadata() *Metadata {
Height: av.height, Height: av.height,
Title: av.title, Title: av.title,
Artist: av.artist, Artist: av.artist,
FPS: int(fps), FPS: int(math.Round(fps)),
HasVideo: av.hasVideo, HasVideo: av.hasVideo,
HasAudio: av.hasAudio, HasAudio: av.hasAudio,
HasAlpha: av.hasAlpha, }
}
func closeAVContext(av *AVContext) {
if !av.closed {
if av.frame != nil {
C.av_frame_free(&av.frame)
}
if av.thumbContext != nil {
C.free_thumb_context(av.thumbContext)
}
if av.codecContext != nil {
C.avcodec_free_context(&av.codecContext)
}
if av.formatContext != nil {
C.free_format_context(av.formatContext)
}
pointer.Unref(av.opaque)
av.closed = true
} }
} }
@ -147,7 +158,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error {
pointer.Unref(av.opaque) pointer.Unref(av.opaque)
return avError(intErr) return avError(intErr)
} }
metaData(av) metadata(av)
duration(av) duration(av)
err := findStreams(av) err := findStreams(av)
if err != nil { if err != nil {
@ -157,7 +168,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error {
return err return err
} }
func metaData(av *AVContext) { func metadata(av *AVContext) {
var artist, title *C.char var artist, title *C.char
C.get_metadata(av.formatContext, &artist, &title) C.get_metadata(av.formatContext, &artist, &title)
av.artist = C.GoString(artist) av.artist = C.GoString(artist)
@ -195,8 +206,8 @@ func createDecoder(av *AVContext) error {
return nil return nil
} }
func incrementDuration(av *AVContext, frame *C.AVFrame, i int) { func incrementDuration(av *AVContext, frame *C.AVFrame, i C.int) {
av.frameAt = i av.indexAt = i
if frame.pts != C.AV_NOPTS_VALUE { if frame.pts != C.AV_NOPTS_VALUE {
ptsToNano := C.int64_t(1000000000 * av.stream.time_base.num / av.stream.time_base.den) ptsToNano := C.int64_t(1000000000 * av.stream.time_base.num / av.stream.time_base.den)
newDuration := time.Duration(frame.pts * ptsToNano) newDuration := time.Duration(frame.pts * ptsToNano)
@ -260,7 +271,7 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str
if err < 0 { if err < 0 {
break break
} }
incrementDuration(av, frame, int(i)) incrementDuration(av, frame, i)
frames <- frame frames <- frame
frame = nil frame = nil
} }
@ -275,32 +286,31 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str
if err != 0 && err != C.int(ErrEOF) { if err != 0 && err != C.int(ErrEOF) {
return avError(err) return avError(err)
} }
av.selectedFrame = C.process_frames(av.thumbContext)
if av.selectedFrame == nil {
return ErrNoMem
}
av.hasAlpha = av.thumbContext.alpha != 0
return nil return nil
} }
func convertFrameToRGB(av *AVContext) error { func findBestFrameIndex(av *AVContext) {
av.outputFrame = C.convert_frame_to_rgb(av.selectedFrame, av.thumbContext.alpha) av.selectedIndex = C.find_best_frame_index(av.thumbContext)
if av.outputFrame == nil { }
func convertFrameToRGB(av *AVContext, bands int) error {
var alpha int
if bands == 4 {
alpha = 1
}
av.frame = C.convert_frame_to_rgb(
C.select_frame(av.thumbContext, av.selectedIndex), C.int(alpha))
if av.frame == nil {
return ErrNoMem return ErrNoMem
} }
return nil return nil
} }
func exportBuffer(av *AVContext) ([]byte, error) { func exportBuffer(av *AVContext, bands int) ([]byte, error) {
if av.outputFrame == nil { if av.frame == nil {
return nil, ErrInvalidData return nil, ErrInvalidData
} }
size := av.height * av.width size := av.height * av.width * bands
if av.hasAlpha { buf := C.GoBytes(unsafe.Pointer(av.frame.data[0]), C.int(size))
size *= 4
} else {
size *= 3
}
buf := C.GoBytes(unsafe.Pointer(av.outputFrame.data[0]), C.int(size))
return buf, nil return buf, nil
} }

View file

@ -27,7 +27,7 @@ struct thumb_frame {
}; };
typedef struct ThumbContext { typedef struct ThumbContext {
int n, alpha, max_frames; int n, max_frames;
struct thumb_frame *frames; struct thumb_frame *frames;
double *median; double *median;
const AVPixFmtDescriptor *desc; const AVPixFmtDescriptor *desc;
@ -59,7 +59,9 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame);
void free_thumb_context(ThumbContext *thumb_ctx); void free_thumb_context(ThumbContext *thumb_ctx);
AVFrame *process_frames(ThumbContext *thumb_ctx); int find_best_frame_index(ThumbContext *thumb_ctx);
AVFrame *select_frame(ThumbContext *thumb_ctx, int i);
void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame); void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame);

View file

@ -57,7 +57,8 @@ func TestAVContext(t *testing.T) {
av, err := LoadAVContext(ctx, reader, stats.Size()) av, err := LoadAVContext(ctx, reader, stats.Size())
require.NoError(t, err) require.NoError(t, err)
defer av.Close() defer av.Close()
err = av.ProcessFrames()
require.NoError(t, err)
meta := av.Metadata() meta := av.Metadata()
metaBuf, err := json.Marshal(meta) metaBuf, err := json.Marshal(meta)
require.NoError(t, err) require.NoError(t, err)
@ -67,13 +68,9 @@ func TestAVContext(t *testing.T) {
} else { } else {
require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666)) require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666))
} }
bands := 4
buf, err := av.Export() buf, err := av.Export(bands)
require.NoError(t, err) require.NoError(t, err)
bands := 3
if meta.HasAlpha {
bands = 4
}
img, err := vips.LoadImageFromMemory(buf, meta.Width, meta.Height, bands) img, err := vips.LoadImageFromMemory(buf, meta.Width, meta.Height, bands)
require.NoError(t, err) require.NoError(t, err)
buf, err = img.ExportJpeg(nil) buf, err = img.ExportJpeg(nil)

View file

@ -112,6 +112,23 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
return return
} }
defer av.Close() defer av.Close()
bands := 3
for _, filter := range params.Filters {
switch filter.Name {
case "format":
if s := strings.ToLower(filter.Args); s == "webp" || s == "png" {
switch mime.Extension() {
case ".webm", ".flv", ".mov", ".avi":
bands = 4
}
break
}
case "process_frames":
if err = av.ProcessFrames(); err != nil {
return
}
}
}
meta := av.Metadata() meta := av.Metadata()
if params.Meta { if params.Meta {
out = imagor.NewBlobFromJsonMarshal(Metadata{ out = imagor.NewBlobFromJsonMarshal(Metadata{
@ -129,17 +146,13 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
case 8: case 8:
filters = append(filters, imagorpath.Filter{Name: "orient", Args: "90"}) filters = append(filters, imagorpath.Filter{Name: "orient", Args: "90"})
} }
buf, err := av.Export() buf, err := av.Export(bands)
if err != nil || len(buf) == 0 { if err != nil || len(buf) == 0 {
if err == nil { if err == nil {
err = imagor.ErrUnsupportedFormat err = imagor.ErrUnsupportedFormat
} }
return return
} }
bands := 3
if meta.HasAlpha {
bands = 4
}
out = imagor.NewBlobFromMemory(buf, meta.Width, meta.Height, bands) out = imagor.NewBlobFromMemory(buf, meta.Width, meta.Height, bands)
if len(filters) > 0 { if len(filters) > 0 {

View file

@ -42,12 +42,14 @@ func TestProcessor(t *testing.T) {
}) })
doGoldenTests(t, filepath.Join(testDataDir, "golden/result"), []test{ doGoldenTests(t, filepath.Join(testDataDir, "golden/result"), []test{
{name: "mkv", path: "fit-in/100x100/everybody-betray-me.mkv"}, {name: "mkv", path: "fit-in/100x100/everybody-betray-me.mkv"},
{name: "mkv meta process_frames", path: "meta/filters:process_frames()/everybody-betray-me.mkv"},
{name: "mkv meta", path: "meta/everybody-betray-me.mkv"}, {name: "mkv meta", path: "meta/everybody-betray-me.mkv"},
{name: "mp4", path: "200x100/schizo_0.mp4"}, {name: "mp4", path: "200x100/schizo_0.mp4"},
{name: "mp4 orient 90", path: "220x100/schizo_90.mp4"}, {name: "mp4 orient 90", path: "220x100/schizo_90.mp4"},
{name: "mp4 orient 180", path: "200x100/schizo_180.mp4"}, {name: "mp4 orient 180", path: "200x100/schizo_180.mp4"},
{name: "mp4 orient 270", path: "200x100/schizo_270.mp4"}, {name: "mp4 orient 270", path: "200x100/schizo_270.mp4"},
{name: "image", path: "fit-in/100x100/demo.png"}, {name: "image", path: "fit-in/100x100/demo.png"},
{name: "alpha", path: "fit-in/filters:format(webp)/alpha-webm.webm"},
{name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406}, {name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406},
}, WithDebug(true), WithLogger(zap.NewExample())) }, WithDebug(true), WithLogger(zap.NewExample()))
doGoldenTests(t, filepath.Join(testDataDir, "golden/result-fallback-image"), []test{ doGoldenTests(t, filepath.Join(testDataDir, "golden/result-fallback-image"), []test{

View file

@ -1 +1 @@
{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false,"has_alpha":true} {"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false}

View file

@ -1 +1 @@
{"orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":1,"duration":3925,"width":492,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":1,"duration":3925,"width":492,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":1,"duration":2560,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":1,"duration":2560,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":1,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":1,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":3,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":3,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":6,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":6,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":8,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"orientation":8,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true}

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

View file

@ -1 +1 @@
{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false} {"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"has_video":true,"has_audio":true}

View file

@ -0,0 +1 @@
{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}