feat: seek() filter and ffmpeg.SeekPosition, ffmpeg.SeekDuration

* feat: seek() filter and ffmpeg.SeekPosition, ffmpeg.SeekDuration
This commit is contained in:
Adrian Shum 2022-10-23 14:47:09 +08:00 committed by GitHub
parent 7350619928
commit 38b1b200ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 60 additions and 20 deletions

View file

@ -41,12 +41,34 @@ imagorvideo then converts the selected frame to RGB image data, forwards to the
imagorvideo supports the following filters, which can be used in conjunction with [imagor filters](https://github.com/cshum/imagor#filters): imagorvideo supports the following filters, which can be used in conjunction with [imagor filters](https://github.com/cshum/imagor#filters):
- `frame(n)` specifying the time position, duration or frame index for imaging, which skips the default automatic selection: - `frame(n)` specify the position or time duration for imaging, which skips the automatic best frame selection:
- Float between `0.0` and `1.0` indices position of the video. Example `frame(0.5)`, `frame(1.0)` - Float between `0.0` and `1.0` indices position of the video. Example `frame(0.5)`, `frame(1.0)`
- Time duration indices the elasped time since the start of video. Example `frame(5m1s)`, `frame(200s)` - Time duration indices the elasped time since the start of video. Example `frame(5m1s)`, `frame(200s)`
- Number starts from 1 indices frame index, example `frame(1)`, `frame(10)` - `seek(n)` seeks to the approximate position or time duration, then perform automatic best frame selection around that point:
- Float between `0.0` and `1.0` indices position of the video. Example `seek(0.5)`
- Time duration indices the elasped time since the start of video. Example `seek(5m1s)`, `seek(200s)`
- `max_frames(n)` restrict the maximum number of frames allocated for image selection. The smaller the number, the faster the processing time. - `max_frames(n)` restrict the maximum number of frames allocated for image selection. The smaller the number, the faster the processing time.
#### `frame(n)` vs `seek(n)`
There are differences you may want to choose one over the other.
`frame(n)` gives you the precise time frame specified. However, precise may not be the best in some circumstances:
```
http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4
```
Retrieving the frame at 5 minutes elapsed time of this video:
```
http://localhost:8000/unsafe/300x0/filters:frame(5m)/http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/BigBuckBunny.mp4
```
It results a complete black frame.
![black](https://raw.githubusercontent.com/cshum/imagorvideo/master/testdata/black.jpg)
This is where `seek(n)` comes handy. It seeks to the key frame before the 5 minutes elapsed time, then perform best frame selection starting from that point using root-mean-square error (RMSE).
The result is a reasonable image that sits close to the specified time:
![seek 5m](https://raw.githubusercontent.com/cshum/imagorvideo/master/testdata/seek5m.jpg)
### Metadata ### Metadata
imagorvideo provides metadata endpoint that extracts video metadata, including dimension, duration and FPS data. It processes header only, without extracting the frame data for better processing speed. imagorvideo provides metadata endpoint that extracts video metadata, including dimension, duration and FPS data. It processes header only, without extracting the frame data for better processing speed.

View file

@ -107,13 +107,21 @@ func (av *AVContext) SelectPosition(f float64) (err error) {
func (av *AVContext) SelectDuration(ts time.Duration) (err error) { func (av *AVContext) SelectDuration(ts time.Duration) (err error) {
if ts > 0 { if ts > 0 {
av.selectedDuration = ts av.selectedDuration = ts
if err = seekDuration(av, ts); err != nil { if err = av.SeekDuration(ts); err != nil {
return return
} }
} }
return av.ProcessFrames(-1) return av.ProcessFrames(-1)
} }
func (av *AVContext) SeekPosition(f float64) error {
return av.SeekDuration(av.positionToDuration(f))
}
func (av *AVContext) SeekDuration(ts time.Duration) error {
return seekDuration(av, ts)
}
func (av *AVContext) Export(bands int) (buf []byte, err error) { func (av *AVContext) Export(bands int) (buf []byte, err error) {
if err = av.ProcessFrames(-1); err != nil { if err = av.ProcessFrames(-1); err != nil {
return return

View file

@ -67,17 +67,6 @@ func TestAVContext(t *testing.T) {
stats, err := os.Stat(path) stats, err := os.Stat(path)
require.NoError(t, err) require.NoError(t, err)
av, err := LoadAVContext(reader, stats.Size()) av, err := LoadAVContext(reader, stats.Size())
require.NoError(t, err)
defer av.Close()
if n == 10 {
require.NoError(t, av.ProcessFrames(n))
} else if n == 99999 {
require.NoError(t, av.SelectDuration(time.Second))
} else if n == 9999 {
require.NoError(t, av.SelectPosition(0.7))
} else if n > -1 {
require.NoError(t, av.SelectFrame(n))
}
meta := av.Metadata() meta := av.Metadata()
metaBuf, err := json.Marshal(meta) metaBuf, err := json.Marshal(meta)
require.NoError(t, err) require.NoError(t, err)
@ -87,6 +76,19 @@ func TestAVContext(t *testing.T) {
} else { } else {
require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666)) require.NoError(t, os.WriteFile(goldenFile, metaBuf, 0666))
} }
require.NoError(t, err)
defer av.Close()
if n == 10 {
require.NoError(t, av.ProcessFrames(n))
} else if n == 99999 {
require.NoError(t, av.SelectDuration(time.Second))
} else if n == 9999 {
require.NoError(t, av.SelectPosition(0.7))
} else if n == 5 {
require.NoError(t, av.SelectFrame(n))
} else {
require.NoError(t, av.SeekPosition(0.7))
}
bands := 4 bands := 4
if n == 99999 { if n == 99999 {
bands = 999 bands = 999

View file

@ -136,6 +136,16 @@ func (p *Processor) Process(ctx context.Context, in *imagor.Blob, params imagorp
} }
} }
} }
case "seek":
if ts, e := time.ParseDuration(filter.Args); e == nil {
if err = av.SeekDuration(ts); err != nil {
return
}
} else if f, e := strconv.ParseFloat(filter.Args, 64); e == nil {
if err = av.SeekPosition(f); err != nil {
return
}
}
case "max_frames": case "max_frames":
n, _ := strconv.Atoi(filter.Args) n, _ := strconv.Atoi(filter.Args)
if err = av.ProcessFrames(n); err != nil { if err = av.ProcessFrames(n); err != nil {

View file

@ -32,7 +32,6 @@ type test struct {
name string name string
path string path string
expectCode int expectCode int
sizeOnly bool
} }
func TestProcessor(t *testing.T) { func TestProcessor(t *testing.T) {
@ -55,8 +54,10 @@ func TestProcessor(t *testing.T) {
{name: "mp4 orient 270", path: "200x100/schizo_270.mp4"}, {name: "mp4 orient 270", path: "200x100/schizo_270.mp4"},
{name: "image", path: "fit-in/100x100/demo.png"}, {name: "image", path: "fit-in/100x100/demo.png"},
{name: "alpha", path: "fit-in/filters:format(png)/alpha-webm.webm"}, {name: "alpha", path: "fit-in/filters:format(png)/alpha-webm.webm"},
{name: "alpha frame duration", path: "500x/filters:frame(5s):format(png)/alpha-webm.webm", sizeOnly: true}, {name: "alpha frame duration", path: "500x/filters:frame(5s):format(png)/alpha-webm.webm"},
{name: "alpha frame position", path: "500x/filters:frame(0.5):format(png)/alpha-webm.webm", sizeOnly: true}, {name: "alpha frame position", path: "500x/filters:frame(0.5):format(png)/alpha-webm.webm"},
{name: "alpha seek duration", path: "500x/filters:seek(5s):format(png)/alpha-webm.webm"},
{name: "alpha seek position", path: "500x/filters:seek(0.5):format(png)/alpha-webm.webm"},
{name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406}, {name: "corrupted", path: "fit-in/100x100/corrupt/everybody-betray-me.mkv", expectCode: 406},
{name: "no cover meta", path: "meta/no_cover.mp3"}, {name: "no cover meta", path: "meta/no_cover.mp3"},
{name: "no cover 406", path: "fit-in/100x100/no_cover.mp3", expectCode: 406}, {name: "no cover 406", path: "fit-in/100x100/no_cover.mp3", expectCode: 406},
@ -108,9 +109,6 @@ func doGoldenTests(t *testing.T, resultDir string, tests []test, opts ...Option)
assert.NoError(t, app.Shutdown(context.Background())) assert.NoError(t, app.Shutdown(context.Background()))
}) })
for _, tt := range tests { for _, tt := range tests {
if i == 1 && tt.sizeOnly {
continue
}
t.Run(fmt.Sprintf("%s-%d", tt.name, i+1), func(t *testing.T) { t.Run(fmt.Sprintf("%s-%d", tt.name, i+1), func(t *testing.T) {
w := httptest.NewRecorder() w := httptest.NewRecorder()
ctx, cancel := context.WithCancel(context.Background()) ctx, cancel := context.WithCancel(context.Background())

BIN
testdata/black.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 853 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 37 KiB

After

Width:  |  Height:  |  Size: 17 KiB

Before After
Before After

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

After

Width:  |  Height:  |  Size: 19 KiB

Before After
Before After

Binary file not shown.

After

Width:  |  Height:  |  Size: 195 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 330 KiB

BIN
testdata/seek5m.jpg vendored Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 7.2 KiB