[FFmpeg-devel] [PATCH] Add example seeking_while_remuxing.c

Stefano Sabatini stefasab at gmail.com
Tue Feb 4 23:38:26 CET 2014


On date Thursday 2014-01-30 22:06:45 +0200, Andrey Utkin encoded:
> ---
>  configure                             |   2 +
>  doc/Makefile                          |   1 +
>  doc/examples/Makefile                 |   1 +
>  doc/examples/seeking_while_remuxing.c | 308 ++++++++++++++++++++++++++++++++++
>  4 files changed, 312 insertions(+)
>  create mode 100644 doc/examples/seeking_while_remuxing.c
> 
> diff --git a/configure b/configure
> index 8b88daf..76723fc 100755
> --- a/configure
> +++ b/configure
> @@ -1250,6 +1250,7 @@ EXAMPLE_LIST="
>      remuxing_example
>      resampling_audio_example
>      scaling_video_example
> +    seeking_while_remuxing_example
>      transcode_aac_example
>  "
>  
> @@ -2399,6 +2400,7 @@ muxing_example_deps="avcodec avformat avutil swscale"
>  remuxing_example_deps="avcodec avformat avutil"
>  resampling_audio_example_deps="avutil swresample"
>  scaling_video_example_deps="avutil swscale"
> +seeking_while_remuxing_example_deps="avcodec avformat avutil"
>  transcode_aac_example_deps="avcodec avformat swresample"
>  
>  # libraries
> diff --git a/doc/Makefile b/doc/Makefile
> index 4092f52..f75a401 100644
> --- a/doc/Makefile
> +++ b/doc/Makefile
> @@ -45,6 +45,7 @@ DOC_EXAMPLES-$(CONFIG_MUXING_EXAMPLE)            += muxing
>  DOC_EXAMPLES-$(CONFIG_REMUXING_EXAMPLE)          += remuxing
>  DOC_EXAMPLES-$(CONFIG_RESAMPLING_AUDIO_EXAMPLE)  += resampling_audio
>  DOC_EXAMPLES-$(CONFIG_SCALING_VIDEO_EXAMPLE)     += scaling_video
> +DOC_EXAMPLES-$(CONFIG_SEEKING_WHILE_REMUXING_EXAMPLE) += seeking_while_remuxing
>  DOC_EXAMPLES-$(CONFIG_TRANSCODE_AAC_EXAMPLE)     += transcode_aac
>  ALL_DOC_EXAMPLES_LIST = $(DOC_EXAMPLES-) $(DOC_EXAMPLES-yes)
>  
> diff --git a/doc/examples/Makefile b/doc/examples/Makefile
> index a25455e..52d9c52 100644
> --- a/doc/examples/Makefile
> +++ b/doc/examples/Makefile
> @@ -20,6 +20,7 @@ EXAMPLES=       decoding_encoding                  \
>                  remuxing                           \
>                  resampling_audio                   \
>                  scaling_video                      \
> +                seeking_while_remuxing             \
>                  transcode_aac                      \
>  
>  OBJS=$(addsuffix .o,$(EXAMPLES))
> diff --git a/doc/examples/seeking_while_remuxing.c b/doc/examples/seeking_while_remuxing.c
> new file mode 100644
> index 0000000..be9eb0e
> --- /dev/null
> +++ b/doc/examples/seeking_while_remuxing.c
> @@ -0,0 +1,308 @@
> +/*
> + * Copyright (c) 2013 Stefano Sabatini
> + * Copyright (c) 2014 Andrey Utkin
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a copy
> + * of this software and associated documentation files (the "Software"), to deal
> + * in the Software without restriction, including without limitation the rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +
> +/**
> + * @file
> + * libavformat/libavcodec demuxing, muxing and seeking API example.
> + *

> + * Remux input file to output file up to 'seekfrom' time position, then seeks
> + * to 'seekto' position and continues remuxing. Seek is performed only once
> + * (won't loop).

So it is basically skipping the interval between seekfrom and seekto?
Maybe this can be done more clear.

> + * @example doc/examples/seeking_while_remuxing.c
> + */
> +
> +#include <libavutil/timestamp.h>
> +#include <libavformat/avformat.h>
> +
> +#define YOU_WANT_NO_ERRORS_ABOUT_NON_MONOTONIC_TIMESTAMPS
> +
> +static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt, const char *tag)
> +{
> +    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
> +
> +    fprintf(stderr, "%s: pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
> +            tag,
> +            av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
> +            av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
> +            av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
> +            pkt->stream_index);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +    AVFormatContext *ifmt_ctx = NULL, *ofmt_ctx = NULL;
> +
> +    int64_t shift = 0; // Output timestamp shift caused by seek.
> +    // In microseconds, 10^-6 of second, which is AV_TIME_BASE_Q
> +
> +    int seek_done = 0;
> +    const char *in_filename, *out_filename, *out_format_name;
> +    int64_t seekfrom, seekto;
> +    int ret;
> +    unsigned int i;
> +

> +    if (argc != 6) {
> +        fprintf(stderr, "Usage: %s <input file> <output file> "
> +                "<output format, or empty for default> "
> +                "<seekfrom: time offset to activate seek, microseconds> "
> +                "<seekto: time offset to seek to, microseconds>\n", argv[0]);
> +        fprintf(stderr, "Remuxes input file to output file up to 'seekfrom' "
> +                "time position, then seeks to 'seekto' position and continues "
> +                "remuxing. Seek is performed only once (won't loop).\n");
> +        return 1;

Please keep same format as the other examples.

Usage: %s input_file output_file output_format seekfrom seekto.
API example progrqm to show how to read frames and seek.
seekfrom and seekto are time offset expressed in microseconds.
This program remuxes input_file to output_file up to 'seekfrom' time
position, then seeks to 'seekto' position and continues remuxing. Seek
is performed only once (won't loop).

Also input and output are probably better, since they are not
necessarily "files".

> +    }
> +
> +    in_filename = argv[1];
> +    out_filename = argv[2];
> +    out_format_name = argv[3];
> +

> +    ret = sscanf(argv[4], "%"PRId64, &seekfrom);
> +    if (ret != 1) {
> +        fprintf(stderr, "Invalid seekfrom %s\n", argv[4]);
> +        return 1;
> +    }
> +
> +    ret = sscanf(argv[5], "%"PRId64, &seekto);
> +    if (ret != 1) {
> +        fprintf(stderr, "Invalid seekto %s\n", argv[5]);
> +        return 1;
> +    }

Probably not more complex and more generic, you can use
av_parse_time().

> +
> +    // Initialize libavformat
> +    av_register_all();

> +    avformat_network_init();

is this required? (because we lack it in other examples?)

> +
> +    // Open file, init input file context, read file's mediacontainer header.

media container?

> +    // Some file and elementary streams information is available after this
> +    if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) {
> +        fprintf(stderr, "Could not open input file '%s'", in_filename);
> +        goto end;
> +    }
> +
> +    // Reads some amount of file contents to get all information about elementary streams.
> +    // This can be not necessary is some cases, but in general case, this is needed step.

"this is a needed step" or better, "this step is needed".

> +    if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) {
> +        fprintf(stderr, "Failed to retrieve input stream information");
> +        goto end;
> +    }
> +
> +    // Dump input file and its elementary streams properties to stderr
> +    av_dump_format(ifmt_ctx, 0, in_filename, 0);
> +
> +    // Open output context, with specified mediacontainer type if given
> +    ret = avformat_alloc_output_context2(&ofmt_ctx, NULL,
> +            out_format_name[0] ? out_format_name : NULL, out_filename);
> +    if (ret < 0) {
> +        fprintf(stderr, "Failed to open output context by URL %s\n", out_filename);
> +        goto end;
> +    }
> +

> +    // Define for output file same elementary streams as in input file

Define same elementary streams for output file as in input file 

VERB OBJECT COMPLEMENTS for transitive verbs (I tend to do the same
mistake since my native language is not strict about positions).

> +    for (i = 0; i < ifmt_ctx->nb_streams; i++) {
> +        AVStream *in_stream = ifmt_ctx->streams[i];
> +        AVStream *out_stream = avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
> +        if (!out_stream) {
> +            fprintf(stderr, "Failed allocating elementary output stream\n");
> +            ret = AVERROR_UNKNOWN;
> +            goto end;
> +        }
> +
> +        ret = avcodec_copy_context(out_stream->codec, in_stream->codec);
> +        if (ret < 0) {
> +            fprintf(stderr, "Failed to copy elementary stream properties\n");
> +            goto end;
> +        }
> +        if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
> +            out_stream->codec->flags |= CODEC_FLAG_GLOBAL_HEADER;
> +    }
> +
> +    av_dump_format(ofmt_ctx, 0, out_filename, 1);
> +
> +    // Initializes actual output context on protocol, output device or file level
> +    ret = avio_open(&ofmt_ctx->pb, out_filename, AVIO_FLAG_WRITE);
> +    if (ret < 0) {
> +        fprintf(stderr, "Could not open output to '%s'", out_filename);
> +        goto end;
> +    }
> +
> +    // Last step of output initialization. Mediacontainer format "driver" is
> +    // initialized. This generally leads to writing header data to output file.
> +    ret = avformat_write_header(ofmt_ctx, NULL);
> +    if (ret < 0) {
> +        fprintf(stderr, "Error occurred when opening output file\n");
> +        goto end;
> +    }
> +
> +    // Copy input elementary streams to output at packed frames level.
> +    // This process is known as remuxing (remultiplexing). It consists of
> +    // demultiplexing (demuxing) streams from input and multiplexing (muxing)
> +    // to output.
> +    // No image/sound decoding takes place in this case.
> +    while (1) {
> +        AVPacket pkt;
> +        AVStream *in_stream, *out_stream;

> +        int64_t current_dts_mcs;

_mcs -> obfuscated variable name

> +
> +        ret = av_read_frame(ifmt_ctx, &pkt);
> +        if (ret < 0)
> +            break;
> +
> +        log_packet(ifmt_ctx, &pkt, "in");
> +
> +        if (pkt.dts == AV_NOPTS_VALUE || pkt.pts == AV_NOPTS_VALUE) {

> +            // TODO Decode to figure out timestamps? Anyway, decoding is out of
> +            // scope of this example currently.

Indeed

> +            //
> +            // Such packets happen to be keyframes in Matroska.
> +            // So dropping them adds up to lost data.

This seems too much specific and thus confusing and should be probably dropped.

> +            // When they're remuxed at the beginning of stream, it's OK, but
> +            // av_interleaved_write_frame() raises non-monotonity error when
> +            // they're pushed after a seek (i.e. when there were
> +            // correctly-timestamped packets before)
> +            printf("Discarding packet not having timestamps\n");
> +            av_free_packet(&pkt);
> +            continue;
> +        }
> +
> +        in_stream  = ifmt_ctx->streams[pkt.stream_index];
> +        out_stream = ofmt_ctx->streams[pkt.stream_index];
> +
> +        current_dts_mcs = av_rescale_q (pkt.dts, in_stream->time_base, AV_TIME_BASE_Q);
> +
> +        // Check if it's time to seek
> +        if (!seek_done
> +            && current_dts_mcs >= seekfrom) {
> +            av_free_packet(&pkt);
> +            printf("Seeking. Last read packet is discarded\n");
> +            ret = av_seek_frame(ifmt_ctx, -1, seekto, 0);
> +            if (ret) {
> +                fprintf(stderr, "Seeking failed\n");
> +                break;
> +            }
> +            seek_done = 1;
> +            shift = seekfrom - seekto;
> +            continue;
> +        }
> +
> +#ifdef YOU_WANT_NO_ERRORS_ABOUT_NON_MONOTONIC_TIMESTAMPS
> +        if (seek_done && current_dts_mcs < seekto) {
> +            printf("Discarding packet having timestamp lower than needed\n");
> +            av_free_packet(&pkt);
> +            continue;
> +            // Citing official ffmpeg docs:
> +            // "Note the in most formats it is not possible to seek exactly, so
> +            // ffmpeg will seek to the closest seek point before (given)
> +            // position."
> +            //
> +            // To seek exactly (accurately), without possibly losing keyframes
> +            // or introducing desync, and still being safe against timestamps
> +            // monotonity problem, you must reencode part of video after
> +            // seeking point, to make key frame where you want to start
> +            // playback after seeking. You may also want to fill possible time
> +            // gaps with silence (for audio) or duplicating frames (for video)
> +            // to support technically poor playback clients (e.g. Flash
> +            // plugin), and this is also achievable with reencoding.  This is
> +            // simpler if you are already in process of transcoding, not in
> +            // remuxing.
> +            //
> +            // Note. In case of necessity to fill audio gaps (e.g. Flash
> +            // player) and avoid even smallest desync, and if audio output
> +            // encoding does not allow variable frame length, in certain
> +            // situation you may have to go in reencoding mode until the end of
> +            // stream, because you may have timestamp shift not equal to
> +            // multiple of audio frame duration.
> +            //
> +            // Note 2. Audio packets dts and pts do not always accurately

> +            // represent reality. Ultimately accurate accounting of audio data
> +            // duration and time offset can be achieved through accounting
> +            // number of audio samples transmitted.

unless you have audio gaps, in that case you must decide if trusting
PTS or not.

> +            //
> +            // The most important and practical part:
> +            //
> +            // In this example, for simplicity, we allow possibility of losing
> +            // keyframe (which can in some cases lead to scattered image for
> +            // some period after seeking). Desync is not introduced, because we
> +            // shift all elementary streams timestamps by same offset, although
> +            // see Note 2.
> +            //
> +            // Another technically similar approach is just to push packets
> +            // carelessly into muxer after seeking (with any rough shift
> +            // calculation), ignoring AVERROR(EINVAL) return values from it.
> +            // Well, you'd better ignore such errors anyway, because you can
> +            // have non-monotonic DTS already in input stream, this indeed
> +            // happens on some files. Although you may track timestamps
> +            // yourself to filter out unordered packets or maybe even reorder
> +            // them.
> +            //
> +            // This chosen approach is generally bad, because failing to
> +            // transmit correctly a video keyframe breaks the playback of up to
> +            // several seconds of video. But it is simple and does not require
> +            // anything except basic remuxing.
> +        }
> +#endif
> +
> +        // We rescale timestamps because time units used in input and output
> +        // file formats may differ
> +        // I.e. for MPEG TS, time unit is 1/90000, for FLV it is 1/1000, etc.
> +        pkt.pts = av_rescale_q(pkt.pts, in_stream->time_base, out_stream->time_base)
> +            + av_rescale_q(shift, AV_TIME_BASE_Q, out_stream->time_base);
> +        pkt.dts = av_rescale_q(pkt.dts, in_stream->time_base, out_stream->time_base)
> +            + av_rescale_q(shift, AV_TIME_BASE_Q, out_stream->time_base);
> +
> +        pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
> +        pkt.pos = -1;
> +        log_packet(ofmt_ctx, &pkt, "out");
> +
> +        ret = av_interleaved_write_frame(ofmt_ctx, &pkt);
> +        if (ret < 0) {
> +            if (ret == AVERROR(EINVAL)) {
> +                printf("Muxing error, presumably of non-monotonic DTS, can be ignored\n");
> +            } else {
> +                fprintf(stderr, "Error muxing packet\n");
> +                break;
> +            }
> +        }
> +        av_free_packet(&pkt);
> +    }
> +
> +    // Deinitialize format driver, finalizes output file/stream appropriately.
> +    av_write_trailer(ofmt_ctx);
> +
> +end:
> +    // Closes input format context and releases related memory
> +    avformat_close_input(&ifmt_ctx);
> +
> +    // Close output file/connection context
> +    if (ofmt_ctx)
> +        avio_close(ofmt_ctx->pb);
> +
> +    // Close format context of output file
> +    avformat_free_context(ofmt_ctx);
> +
> +    // Check if we got here because of error, if so - decode its meaning and report
> +    if (ret < 0 && ret != AVERROR_EOF) {
> +        fprintf(stderr, "Error occurred: %s\n", av_err2str(ret));
> +        return 1;
> +    }
> +    return 0;

Again, I don't see much difference with remuxing.c and I'd like better
to integrate seeking in it rather than having to maintain and keep two
distinct files in synch. The offset can be optional, in case it is not
ignored. Also I think a more useful use-case is when the user needs to
specify the start and end of the stream to remux, rather than the part
to skip.
-- 
FFmpeg = Frightening & Forgiving Mind-dumbing Plastic Erroneous Gadget


More information about the ffmpeg-devel mailing list