refactor(ffmpeg): refactor process frames

* refactor best frame process

* refactor best frame process, remove alpha

* move thumb context to export

* process_frames() filter

* test: update golden files

* process_frames() filter

* test: update golden files

* tweak fps

* test: update golden files

* cleanup
This commit is contained in:
Adrian Shum 2022-10-13 15:23:23 +08:00 committed by GitHub
parent c86b39430d
commit 4d71837fb8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 127 additions and 122 deletions

View file

@ -294,8 +294,6 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame) {
}
int frames_in_128mb = (1 << 30) / (av_get_bits_per_pixel(thumb_ctx->desc) * frame->height * frame->width);
thumb_ctx->max_frames = FFMIN(nb_frames, frames_in_128mb);
// thumb_ctx->hist_size = 0;
// thumb_ctx->alpha = 0;
int i;
for (i = 0; i < thumb_ctx->desc->nb_components; i++) {
thumb_ctx->hist_size += 1 << thumb_ctx->desc->comp[i].depth;
@ -353,19 +351,6 @@ static double root_mean_square_error(const int *hist, const double *median, size
return sum_sq_err;
}
static int alpha_check(const AVFrame *frame, const uint64_t flags, const int last_hist_num) {
if (flags & AV_PIX_FMT_FLAG_PAL) {
for (int i = 3; i <= 1023; i += 4) {
if (frame->data[1][i] != 255) {
return 1;
}
}
} else if (flags & AV_PIX_FMT_FLAG_ALPHA && last_hist_num < frame->width * frame->height) {
return 1;
}
return 0;
}
void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) {
const AVPixFmtDescriptor *desc = thumb_ctx->desc;
thumb_ctx->frames[n].frame = frame;
@ -420,10 +405,17 @@ void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame) {
}
}
static AVFrame *get_best_frame(ThumbContext *thumb_ctx) {
int find_best_frame_index(ThumbContext *thumb_ctx) {
int i, j, n = 0, m = thumb_ctx->n, *hist = NULL;
double *median = thumb_ctx->median;
for (j = 0; j < m; j++) {
hist = thumb_ctx->frames[j].hist;
for (i = 0; i < thumb_ctx->hist_size; i++) {
median[i] += (double) hist[i] / m;
}
}
struct thumb_frame *t_frame = NULL;
double min_sum_sq_err = DBL_MAX, sum_sq_err = 0;
int i, n = 0;
for (i = 0; i < thumb_ctx->n; i++) {
t_frame = thumb_ctx->frames + i;
sum_sq_err = root_mean_square_error(t_frame->hist, thumb_ctx->median, thumb_ctx->hist_size);
@ -432,21 +424,9 @@ static AVFrame *get_best_frame(ThumbContext *thumb_ctx) {
n = i;
}
}
thumb_ctx->alpha = alpha_check(
thumb_ctx->frames[n].frame,
thumb_ctx->desc->flags,
thumb_ctx->frames[n].hist[thumb_ctx->hist_size - 1]);
return thumb_ctx->frames[n].frame;
return n;
}
AVFrame *process_frames(ThumbContext *thumb_ctx) {
int i, j, *hist = NULL, n = thumb_ctx->n;
double *median = thumb_ctx->median;
for (j = 0; j < n; j++) {
hist = thumb_ctx->frames[j].hist;
for (i = 0; i < thumb_ctx->hist_size; i++) {
median[i] += (double) hist[i] / n;
}
}
return get_best_frame(thumb_ctx);
AVFrame *select_frame(ThumbContext *thumb_ctx, int n) {
return thumb_ctx->frames[n].frame;
}

View file

@ -9,6 +9,7 @@ import (
"context"
"github.com/cshum/imagor/vips/pointer"
"io"
"math"
"time"
"unsafe"
)
@ -31,39 +32,37 @@ type Metadata struct {
FPS int `json:"fps,omitempty"`
HasVideo bool `json:"has_video"`
HasAudio bool `json:"has_audio"`
HasAlpha bool `json:"has_alpha"`
}
type AVContext struct {
context context.Context
opaque unsafe.Pointer
reader io.Reader
seeker io.Seeker
formatContext *C.AVFormatContext
stream *C.AVStream
codecContext *C.AVCodecContext
thumbContext *C.ThumbContext
selectedFrame *C.AVFrame
outputFrame *C.AVFrame
durationInFormat bool
context context.Context
opaque unsafe.Pointer
reader io.Reader
seeker io.Seeker
formatContext *C.AVFormatContext
stream *C.AVStream
codecContext *C.AVCodecContext
thumbContext *C.ThumbContext
selectedIndex C.int
frame *C.AVFrame
durationInFormat bool
orientation int
size int64
duration time.Duration
frameAt int
indexAt C.int
durationAt time.Duration
width, height int
title, artist string
hasVideo, hasAudio bool
hasAlpha bool
closed bool
}
func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContext, error) {
av := &AVContext{
context: ctx,
reader: reader,
size: size,
context: ctx,
reader: reader,
size: size,
selectedIndex: -1,
}
if seeker, ok := reader.(io.Seeker); ok {
av.seeker = seeker
@ -81,36 +80,30 @@ func LoadAVContext(ctx context.Context, reader io.Reader, size int64) (*AVContex
if err := createDecoder(av); err != nil {
return av, err
}
if err := createThumbContext(av); err != nil {
return av, err
}
if err := convertFrameToRGB(av); err != nil {
return av, err
}
return av, nil
}
func closeAVContext(av *AVContext) {
if !av.closed {
if av.outputFrame != nil {
C.av_frame_free(&av.outputFrame)
}
if av.thumbContext != nil {
C.free_thumb_context(av.thumbContext)
av.selectedFrame = nil
}
if av.codecContext != nil {
C.avcodec_free_context(&av.codecContext)
}
if av.formatContext != nil {
C.free_format_context(av.formatContext)
}
pointer.Unref(av.opaque)
func (av *AVContext) ProcessFrames() (err error) {
if av.thumbContext == nil {
return createThumbContext(av)
}
return
}
func (av *AVContext) Export() (buf []byte, err error) {
return exportBuffer(av)
func (av *AVContext) Export(bands int) (buf []byte, err error) {
if err = av.ProcessFrames(); err != nil {
return
}
if av.selectedIndex < 0 {
findBestFrameIndex(av)
}
if bands < 3 || bands > 4 {
bands = 3
}
if err = convertFrameToRGB(av, bands); err != nil {
return
}
return exportBuffer(av, bands)
}
func (av *AVContext) Close() {
@ -120,7 +113,7 @@ func (av *AVContext) Close() {
func (av *AVContext) Metadata() *Metadata {
var fps float64
if av.durationAt > 0 {
fps = float64(av.frameAt) * float64(time.Second) / float64(av.durationAt)
fps = float64(av.indexAt) * float64(time.Second) / float64(av.durationAt)
}
return &Metadata{
Orientation: av.orientation,
@ -129,10 +122,28 @@ func (av *AVContext) Metadata() *Metadata {
Height: av.height,
Title: av.title,
Artist: av.artist,
FPS: int(fps),
FPS: int(math.Round(fps)),
HasVideo: av.hasVideo,
HasAudio: av.hasAudio,
HasAlpha: av.hasAlpha,
}
}
func closeAVContext(av *AVContext) {
if !av.closed {
if av.frame != nil {
C.av_frame_free(&av.frame)
}
if av.thumbContext != nil {
C.free_thumb_context(av.thumbContext)
}
if av.codecContext != nil {
C.avcodec_free_context(&av.codecContext)
}
if av.formatContext != nil {
C.free_format_context(av.formatContext)
}
pointer.Unref(av.opaque)
av.closed = true
}
}
@ -147,7 +158,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error {
pointer.Unref(av.opaque)
return avError(intErr)
}
metaData(av)
metadata(av)
duration(av)
err := findStreams(av)
if err != nil {
@ -157,7 +168,7 @@ func createFormatContext(av *AVContext, callbackFlags C.int) error {
return err
}
func metaData(av *AVContext) {
func metadata(av *AVContext) {
var artist, title *C.char
C.get_metadata(av.formatContext, &artist, &title)
av.artist = C.GoString(artist)
@ -195,8 +206,8 @@ func createDecoder(av *AVContext) error {
return nil
}
func incrementDuration(av *AVContext, frame *C.AVFrame, i int) {
av.frameAt = i
func incrementDuration(av *AVContext, frame *C.AVFrame, i C.int) {
av.indexAt = i
if frame.pts != C.AV_NOPTS_VALUE {
ptsToNano := C.int64_t(1000000000 * av.stream.time_base.num / av.stream.time_base.den)
newDuration := time.Duration(frame.pts * ptsToNano)
@ -260,7 +271,7 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str
if err < 0 {
break
}
incrementDuration(av, frame, int(i))
incrementDuration(av, frame, i)
frames <- frame
frame = nil
}
@ -275,32 +286,31 @@ func populateThumbContext(av *AVContext, frames chan *C.AVFrame, done <-chan str
if err != 0 && err != C.int(ErrEOF) {
return avError(err)
}
av.selectedFrame = C.process_frames(av.thumbContext)
if av.selectedFrame == nil {
return ErrNoMem
}
av.hasAlpha = av.thumbContext.alpha != 0
return nil
}
func convertFrameToRGB(av *AVContext) error {
av.outputFrame = C.convert_frame_to_rgb(av.selectedFrame, av.thumbContext.alpha)
if av.outputFrame == nil {
func findBestFrameIndex(av *AVContext) {
av.selectedIndex = C.find_best_frame_index(av.thumbContext)
}
func convertFrameToRGB(av *AVContext, bands int) error {
var alpha int
if bands == 4 {
alpha = 1
}
av.frame = C.convert_frame_to_rgb(
C.select_frame(av.thumbContext, av.selectedIndex), C.int(alpha))
if av.frame == nil {
return ErrNoMem
}
return nil
}
func exportBuffer(av *AVContext) ([]byte, error) {
if av.outputFrame == nil {
func exportBuffer(av *AVContext, bands int) ([]byte, error) {
if av.frame == nil {
return nil, ErrInvalidData
}
size := av.height * av.width
if av.hasAlpha {
size *= 4
} else {
size *= 3
}
buf := C.GoBytes(unsafe.Pointer(av.outputFrame.data[0]), C.int(size))
size := av.height * av.width * bands
buf := C.GoBytes(unsafe.Pointer(av.frame.data[0]), C.int(size))
return buf, nil
}

View file

@ -27,7 +27,7 @@ struct thumb_frame {
};
typedef struct ThumbContext {
int n, alpha, max_frames;
int n, max_frames;
struct thumb_frame *frames;
double *median;
const AVPixFmtDescriptor *desc;
@ -59,7 +59,9 @@ ThumbContext *create_thumb_context(AVStream *stream, AVFrame *frame);
void free_thumb_context(ThumbContext *thumb_ctx);
AVFrame *process_frames(ThumbContext *thumb_ctx);
int find_best_frame_index(ThumbContext *thumb_ctx);
AVFrame *select_frame(ThumbContext *thumb_ctx, int i);
void populate_histogram(ThumbContext *thumb_ctx, int n, AVFrame *frame);

View file

@ -57,7 +57,8 @@ func TestAVContext(t *testing.T) {
av, err := LoadAVContext(ctx, reader, stats.Size())
require.NoError(t, err)
defer av.Close()
err = av.ProcessFrames()
require.NoError(t, err)
meta := av.Metadata()
metaBuf, err := json.Marshal(meta)
require.NoError(t, err)
@ -67,13 +68,9 @@ func TestAVContext(t *testing.T) {
} else {
require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666))
}
buf, err := av.Export()
bands := 4
buf, err := av.Export(bands)
require.NoError(t, err)
bands := 3
if meta.HasAlpha {
bands = 4
}
img, err := vips.LoadImageFromMemory(buf, meta.Width, meta.Height, bands)
require.NoError(t, err)
buf, err = img.ExportJpeg(nil)

View file

@ -112,6 +112,23 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
return
}
defer av.Close()
bands := 3
for _, filter := range params.Filters {
switch filter.Name {
case "format":
if s := strings.ToLower(filter.Args); s == "webp" || s == "png" {
switch mime.Extension() {
case ".webm", ".flv", ".mov", ".avi":
bands = 4
}
break
}
case "process_frames":
if err = av.ProcessFrames(); err != nil {
return
}
}
}
meta := av.Metadata()
if params.Meta {
out = imagor.NewBlobFromJsonMarshal(Metadata{
@ -129,17 +146,13 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
case 8:
filters = append(filters, imagorpath.Filter{Name: "orient", Args: "90"})
}
buf, err := av.Export()
buf, err := av.Export(bands)
if err != nil || len(buf) == 0 {
if err == nil {
err = imagor.ErrUnsupportedFormat
}
return
}
bands := 3
if meta.HasAlpha {
bands = 4
}
out = imagor.NewBlobFromMemory(buf, meta.Width, meta.Height, bands)
if len(filters) > 0 {

View file

@ -42,12 +42,14 @@ func TestProcessor(t *testing.T) {
})
doGoldenTests(t, filepath.Join(testDataDir, "golden/result"), []test{
{name: "mkv", path: "fit-in/100x100/everybody-betray-me.mkv"},
{name: "mkv meta process_frames", path: "meta/filters:process_frames()/everybody-betray-me.mkv"},
{name: "mkv meta", path: "meta/everybody-betray-me.mkv"},
{name: "mp4", path: "200x100/schizo_0.mp4"},
{name: "mp4 orient 90", path: "220x100/schizo_90.mp4"},
{name: "mp4 orient 180", path: "200x100/schizo_180.mp4"},
{name: "mp4 orient 270", path: "200x100/schizo_270.mp4"},
{name: "image", path: "fit-in/100x100/demo.png"},
{name: "alpha", path: "fit-in/filters:format(webp)/alpha-webm.webm"},
{name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406},
}, WithDebug(true), WithLogger(zap.NewExample()))
doGoldenTests(t, filepath.Join(testDataDir, "golden/result-fallback-image"), []test{

View file

@ -1 +1 @@
{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false,"has_alpha":true}
{"orientation":1,"duration":12040,"width":720,"height":576,"fps":25,"has_video":true,"has_audio":false}

View file

@ -1 +1 @@
{"orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":1,"duration":3925,"width":492,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":1,"duration":3925,"width":492,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":1,"duration":2560,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":1,"duration":2560,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":1,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":1,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":3,"duration":2544,"width":480,"height":360,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":3,"duration":2544,"width":480,"height":360,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":6,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":6,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true}

View file

@ -1 +1 @@
{"orientation":8,"duration":2544,"width":360,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"orientation":8,"duration":2544,"width":360,"height":480,"fps":30,"has_video":true,"has_audio":true}

Binary file not shown.

After

Width:  |  Height:  |  Size: 114 KiB

View file

@ -1 +1 @@
{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":29,"has_video":true,"has_audio":true,"has_alpha":false}
{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"has_video":true,"has_audio":true}

View file

@ -0,0 +1 @@
{"format":"mkv","content_type":"video/x-matroska","orientation":1,"duration":7407,"width":640,"height":480,"fps":30,"has_video":true,"has_audio":true}