[FFmpeg-devel] [PATCH 08/21] avformat/matroskadec: Improve error messages

Andreas Rheinhardt andreas.rheinhardt at googlemail.com
Sun Apr 7 12:24:00 EEST 2019


Steve Lhomme:
> On 3/27/2019 12:18 PM, Andreas Rheinhardt via ffmpeg-devel wrote:
>> ebml_read_num had a number of flaws:
>>
>> 1. The check for read errors/EOF was totally wrong. E.g. an EBML number
>> beginning with the invalid 0x00 would be considered a read error,
>> although it is just invalid data.
>> 2. The check for read errors/EOF was done just once, after reading the
>> first byte of the EBML number. But errors/EOF can happen inbetween, of
>> course, and this wasn't checked.
>> 3. There was no way to distinguish when EOF should be an error (because
>> the data has to be there) for which an error message should be emitted
>> and when it is not necessarily an error (namely during parsing of EBML
>> IDs). Such a possibility has been added and used.
> 
> Maybe the title of the patch should rather mention that it's fixing
> the EOF handling when reading EBML ID/Length. The changed error
> messages is a less important consequence.
> 
How about "avformat/matroskadec: Improve (in particular) EOF checks"?
> 
>>
>> Some useless initializations were also fixed.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt at googlemail.com>
>> ---
>>   libavformat/matroskadec.c | 61
>> ++++++++++++++++++++++-----------------
>>   1 file changed, 34 insertions(+), 27 deletions(-)
>>
>> diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c
>> index a6617a607b..aa2266384a 100644
>> --- a/libavformat/matroskadec.c
>> +++ b/libavformat/matroskadec.c
>> @@ -820,29 +820,19 @@ static int ebml_level_end(MatroskaDemuxContext
>> *matroska)
>>    * Returns: number of bytes read, < 0 on error
>>    */
>>   static int ebml_read_num(MatroskaDemuxContext *matroska,
>> AVIOContext *pb,
>> -                         int max_size, uint64_t *number)
>> +                         int max_size, uint64_t *number, int
>> eof_forbidden)
>>   {
>> -    int read = 1, n = 1;
>> -    uint64_t total = 0;
>> +    int read, n = 1;
>> +    uint64_t total;
>> +    int64_t pos;
>>   -    /* The first byte tells us the length in bytes - avio_r8()
>> can normally
>> -     * return 0, but since that's not a valid first ebmlID byte, we
>> can
>> -     * use it safely here to catch EOS. */
>> -    if (!(total = avio_r8(pb))) {
>> -        /* we might encounter EOS here */
>> -        if (!avio_feof(pb)) {
>> -            int64_t pos = avio_tell(pb);
>> -            av_log(matroska->ctx, AV_LOG_ERROR,
>> -                   "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
>> -                   pos, pos);
>> -            return pb->error ? pb->error : AVERROR(EIO);
>> -        }
>> -        return AVERROR_EOF;
>> -    }
>> +    /* The first byte tells us the length in bytes - except when it
>> is zero. */
>> +    total = avio_r8(pb);
>> +    if (avio_feof(pb))
>> +        goto err;
>>         /* get the length of the EBML number */
>> -    read = 8 - ff_log2_tab[total];
>> -    if (read > max_size) {
>> +    if (!total || (read = 8 - ff_log2_tab[total]) > max_size) {
>>           int64_t pos = avio_tell(pb) - 1;
>>           av_log(matroska->ctx, AV_LOG_ERROR,
>>                  "Invalid EBML number size tag 0x%02x at pos
>> %"PRIu64" (0x%"PRIx64")\n",
>> @@ -855,9 +845,27 @@ static int ebml_read_num(MatroskaDemuxContext
>> *matroska, AVIOContext *pb,
>>       while (n++ < read)
>>           total = (total << 8) | avio_r8(pb);
>>   +    if (avio_feof(pb)) {
>> +        eof_forbidden = 1;
>> +        goto err;
>> +    }
> 
> You're forcing an error if the data ends after reading a number ?
> Ending a Matroska file with a number should be fine. It could also be
> an element with a size of 0. It doesn't contain any data but it's
> still valid (depending on the semantic of the element).
> 
> So this forced error seem wrong. Let the next read catch the EOF if it
> finds one.
> 
avio_feof doesn't indicate an error if we reach EOF, only if we
attempt to read past EOF (or if another error occurred). If the EBML
number we intend to parse is completely read, then no error is
indicated here at all. If the input ends immediately after this EBML
number, then the next read will trigger EOF and will have to catch it.

If avio_feof is set at this point, it means that the first byte of the
EBML number says that the EBML number is total bytes long, but an
error or EOF occured during reading of these bytes. I think this
warrants an error. eof_forbidden is set at this point so that an
incomplete EBML number will always trigger an error.
>> +
>>       *number = total;
>>         return read;
>> +
>> +err:
>> +    pos = avio_tell(pb);
>> +    if (pb->error) {
>> +        av_log(matroska->ctx, AV_LOG_ERROR,
>> +               "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
>> +               pos, pos);
>> +        return pb->error;
>> +    }
>> +    if (eof_forbidden)
>> +        av_log(matroska->ctx, AV_LOG_ERROR, "File ended prematurely "
>> +               "at pos. %"PRIu64" (0x%"PRIx64")\n", pos, pos);
> 
> If eof_forbidden is false (EOF allowed) you return an EOF error anyway ?
> 
Yes, so that the caller knows that the file has ended and can act
accordingly. Currently only the parsing of EBML IDs in ebml_parse
allow EOF and if we hit EOF while parsing an EBML ID, ebml_parse does
a more thorough check for whether this is a real error or something
legal (of course, based upon whether we the current level is
unknown-sized or not).
>> +    return AVERROR_EOF;
>>   }
>>     /**
>> @@ -868,7 +876,7 @@ static int ebml_read_num(MatroskaDemuxContext
>> *matroska, AVIOContext *pb,
>>   static int ebml_read_length(MatroskaDemuxContext *matroska,
>> AVIOContext *pb,
>>                               uint64_t *number)
>>   {
>> -    int res = ebml_read_num(matroska, pb, 8, number);
>> +    int res = ebml_read_num(matroska, pb, 8, number, 1);
>>       if (res > 0 && *number + 1 == 1ULL << (7 * res))
>>           *number = EBML_UNKNOWN_LENGTH;
>>       return res;
>> @@ -1010,7 +1018,7 @@ static int
>> matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
>>   {
>>       AVIOContext pb;
>>       ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
>> -    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
>> +    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num, 1);
>>   }
>>     /*
>> @@ -1057,7 +1065,7 @@ static int ebml_parse(MatroskaDemuxContext
>> *matroska, EbmlSyntax *syntax,
>>   {
>>       if (!matroska->current_id) {
>>           uint64_t id;
>> -        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
>> +        int res = ebml_read_num(matroska, matroska->ctx->pb, 4,
>> &id, 0);
>>           if (res < 0) {
>>               // in live mode, finish parsing if EOF is reached.
>>               return (matroska->is_live &&
>> matroska->ctx->pb->eof_reached &&
>> @@ -3335,10 +3343,9 @@ static int
>> matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf
>>       uint64_t num;
>>       int trust_default_duration = 1;
>>   -    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) <
>> 0) {
>> -        av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data
>> error\n");
>> +    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0)
>>           return n;
>> -    }
>> +
>>       data += n;
>>       size -= n;
>>   @@ -3699,7 +3706,7 @@ static int
>> webm_clusters_start_with_keyframe(AVFormatContext *s)
>>           AVPacket *pkt;
>>           avio_seek(s->pb, cluster_pos, SEEK_SET);
>>           // read cluster id and length
>> -        read = ebml_read_num(matroska, matroska->ctx->pb, 4,
>> &cluster_id);
>> +        read = ebml_read_num(matroska, matroska->ctx->pb, 4,
>> &cluster_id, 1);
>>           if (read < 0 || cluster_id != 0xF43B675) // done with all
>> clusters
>>               break;
>>           read = ebml_read_length(matroska, matroska->ctx->pb,
>> &cluster_length);
>> @@ -3916,7 +3923,7 @@ static int
>> webm_dash_manifest_cues(AVFormatContext *s, int64_t init_range)
>>           // Cues element ID + EBML length of the Cues element.
>> cues_end is
>>           // inclusive and the above sum is reduced by 1.
>>           uint64_t cues_length = 0, cues_id = 0, bytes_read = 0;
>> -        bytes_read += ebml_read_num(matroska, matroska->ctx->pb, 4,
>> &cues_id);
>> +        bytes_read += ebml_read_num(matroska, matroska->ctx->pb, 4,
>> &cues_id, 1);
> 
> += on a call that may return an error doesn't seem correct. The error
> should be checked before using the value.
> 
Ok, I'll fix this (pre-existing) error.
>>           bytes_read += ebml_read_length(matroska,
>> matroska->ctx->pb, &cues_length);
>>           cues_end = cues_start + cues_length + bytes_read - 1;
>>       }
>> -- 
>> 2.19.2


More information about the ffmpeg-devel mailing list