[FFmpeg-devel] [PATCH] avcodec/ffv1enc: further reduce stack usage
James Almer
jamrial at gmail.com
Tue Mar 25 00:20:50 EET 2025
Continues from commit 702239bc500b, fixing FATE failures on MacOS.
Signed-off-by: James Almer <jamrial at gmail.com>
---
Confirmed by Martin Storsjö. Float encoding untested.
libavcodec/ffv1.h | 16 ++++
libavcodec/ffv1enc.c | 177 +++++++++++++++++--------------------------
2 files changed, 84 insertions(+), 109 deletions(-)
diff --git a/libavcodec/ffv1.h b/libavcodec/ffv1.h
index 09118e0b7d..d1c239f138 100644
--- a/libavcodec/ffv1.h
+++ b/libavcodec/ffv1.h
@@ -115,6 +115,22 @@ typedef struct FFV1SliceContext {
uint32_t val; //this is unneeded if you accept a dereference on each access
uint16_t ndx;
} unit[4][65536];
+ struct RemapEncoderState {
+ int delta_stack[65536]; //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
+ int16_t index_stack[65537]; //only needed with multiple segments
+ uint8_t state[2][3][32];
+ int mul[4096+1];
+ RangeCoder rc;
+ int lu;
+ int run;
+ int64_t last_val;
+ int compact_index;
+ int mul_count;
+ int i;
+ int pixel_num;
+ int p;
+ int current_mul_index;
+ } remap_state;
} FFV1SliceContext;
typedef struct FFV1Context {
diff --git a/libavcodec/ffv1enc.c b/libavcodec/ffv1enc.c
index 4340a301e1..8dcca7e1d6 100644
--- a/libavcodec/ffv1enc.c
+++ b/libavcodec/ffv1enc.c
@@ -1246,42 +1246,7 @@ static void load_rgb_float32_frame(FFV1Context *f, FFV1SliceContext *sc,
AV_QSORT(sc->unit[3], i, struct Unit, CMP);
}
-typedef struct RemapEncoderState {
- int delta_stack[65536]; //We need to encode the run value before the adjustments, this stores the adjustments until we know the length of the run
- int16_t index_stack[65537]; //only needed with multiple segments
- uint8_t state[2][3][32];
- int mul[4096+1];
- RangeCoder rc;
- int lu;
- int run;
- int64_t last_val;
- int compact_index;
- int mul_count;
- int i;
- int pixel_num;
- int p;
- int current_mul_index;
-} RemapEncoderState;
-
-static inline void copy_state(RemapEncoderState *dst, const RemapEncoderState *src)
-{
- dst->rc = src->rc;
- memcpy(dst->mul, src->mul, (src->mul_count + 1) * sizeof(src->mul[0]));
- memcpy(dst->delta_stack, src->delta_stack, src->run * sizeof(src->delta_stack[0]));
- memcpy(dst->index_stack, src->index_stack, (src->run + 1) * sizeof(src->index_stack[0]));
- memcpy(dst->state, src->state, sizeof(dst->state));
- dst->lu = src->lu;
- dst->run = src->run;
- dst->last_val = src->last_val;
- dst->compact_index = src->compact_index;
- dst->mul_count = src->mul_count;
- dst->i = src->i;
- dst->pixel_num = src->pixel_num;
- dst->p = src->p;
- dst->current_mul_index = src->current_mul_index;
-}
-
-static inline void encode_mul(RemapEncoderState *s, int mul_index)
+static inline void encode_mul(struct RemapEncoderState *s, int mul_index)
{
av_assert2(s->mul[ mul_index ]);
if (s->mul[ mul_index ] < 0) {
@@ -1290,122 +1255,116 @@ static inline void encode_mul(RemapEncoderState *s, int mul_index)
}
}
-static int encode_float32_remap_segment(FFV1SliceContext *sc,
- RemapEncoderState *state_arg, int update, int final)
+static int encode_float32_remap_segment(FFV1SliceContext *sc, int final)
{
- RemapEncoderState s;
-
- copy_state(&s, state_arg);
-
- if (s.i == 0) {
- memset(s.state, 128, sizeof(s.state));
- put_symbol(&s.rc, s.state[0][0], s.mul_count, 0);
- memset(s.state, 128, sizeof(s.state));
- s.last_val = -1;
- s.compact_index = -1;
- s.lu = 0;
- s.run = 0;
- s.current_mul_index = -1;
+ struct RemapEncoderState *s = &sc->remap_state;
+
+ if (s->i == 0) {
+ memset(s->state, 128, sizeof(s->state));
+ put_symbol(&s->rc, s->state[0][0], s->mul_count, 0);
+ memset(s->state, 128, sizeof(s->state));
+ s->last_val = -1;
+ s->compact_index = -1;
+ s->lu = 0;
+ s->run = 0;
+ s->current_mul_index = -1;
}
- for (; s.i < s.pixel_num+1; s.i++) {
- int current_mul = s.current_mul_index < 0 ? 1 : FFABS(s.mul[s.current_mul_index]);
+ for (; s->i < s->pixel_num+1; s->i++) {
+ int current_mul = s->current_mul_index < 0 ? 1 : FFABS(s->mul[s->current_mul_index]);
int64_t val;
- if (s.i == s.pixel_num) {
- if (s.last_val == 0xFFFFFFFF) {
+ if (s->i == s->pixel_num) {
+ if (s->last_val == 0xFFFFFFFF) {
break;
} else {
val = 1LL<<32;
}
} else
- val = sc->unit[s.p][s.i].val;
+ val = sc->unit[s->p][s->i].val;
- if (s.last_val != val) {
+ if (s->last_val != val) {
int64_t delta = 0;
- av_assert2(s.last_val < val);
+ av_assert2(s->last_val < val);
av_assert2(current_mul > 0);
if (current_mul > 1) {
- delta = val - s.last_val;
+ delta = val - s->last_val;
val = FFMAX(1, (delta + current_mul/2) / current_mul);
delta -= val*current_mul;
av_assert2(delta <= current_mul/2);
av_assert2(delta > -current_mul);
- val += s.last_val;
+ val += s->last_val;
}
- av_assert2(s.last_val < val);
- if (s.lu) {
- s.index_stack[s.run] = s.current_mul_index;
- av_assert2(s.run < FF_ARRAY_ELEMS(s.delta_stack));
- if (val - s.last_val == 1) {
- s.delta_stack[s.run] = delta;
- s.run ++;
- av_assert2(s.i == s.pixel_num || s.last_val + current_mul + delta == sc->unit[s.p][s.i].val);
- s.last_val += current_mul + delta;
+ av_assert2(s->last_val < val);
+ if (s->lu) {
+ s->index_stack[s->run] = s->current_mul_index;
+ av_assert2(s->run < FF_ARRAY_ELEMS(s->delta_stack));
+ if (val - s->last_val == 1) {
+ s->delta_stack[s->run] = delta;
+ s->run ++;
+ av_assert2(s->i == s->pixel_num || s->last_val + current_mul + delta == sc->unit[s->p][s->i].val);
+ s->last_val += current_mul + delta;
} else {
- put_symbol_inline(&s.rc, s.state[s.lu][0], s.run, 0, NULL, NULL);
+ put_symbol_inline(&s->rc, s->state[s->lu][0], s->run, 0, NULL, NULL);
- for(int k=0; k<s.run; k++) {
- int stack_mul = s.mul[ s.index_stack[k] ];
+ for(int k=0; k<s->run; k++) {
+ int stack_mul = s->mul[ s->index_stack[k] ];
if (stack_mul>1)
- put_symbol_inline(&s.rc, s.state[s.lu][1], s.delta_stack[k], 1, NULL, NULL);
- encode_mul(&s, s.index_stack[k+1]);
+ put_symbol_inline(&s->rc, s->state[s->lu][1], s->delta_stack[k], 1, NULL, NULL);
+ encode_mul(s, s->index_stack[k+1]);
}
- if (s.run == 0)
- s.lu ^= 1;
- s.run = 0;
- s.i--; // we did not encode val so we need to backstep
- s.last_val += current_mul;
+ if (s->run == 0)
+ s->lu ^= 1;
+ s->run = 0;
+ s->i--; // we did not encode val so we need to backstep
+ s->last_val += current_mul;
continue;
}
} else {
- av_assert2(s.run == 0);
- put_symbol_inline(&s.rc, s.state[s.lu][0], val - s.last_val - 1, 0, NULL, NULL);
+ av_assert2(s->run == 0);
+ put_symbol_inline(&s->rc, s->state[s->lu][0], val - s->last_val - 1, 0, NULL, NULL);
if (current_mul > 1)
- put_symbol_inline(&s.rc, s.state[s.lu][1], delta, 1, NULL, NULL);
- if (val - s.last_val == 1)
- s.lu ^= 1;
+ put_symbol_inline(&s->rc, s->state[s->lu][1], delta, 1, NULL, NULL);
+ if (val - s->last_val == 1)
+ s->lu ^= 1;
- av_assert2(s.i == s.pixel_num || s.last_val + (val - s.last_val) * current_mul + delta == sc->unit[s.p][s.i].val);
- if (s.i < s.pixel_num)
- s.last_val = sc->unit[s.p][s.i].val;
+ av_assert2(s->i == s->pixel_num || s->last_val + (val - s->last_val) * current_mul + delta == sc->unit[s->p][s->i].val);
+ if (s->i < s->pixel_num)
+ s->last_val = sc->unit[s->p][s->i].val;
}
- s.current_mul_index = ((s.last_val + 1) * s.mul_count) >> 32;
- if (!s.run)
- encode_mul(&s, s.current_mul_index);
- s.compact_index ++;
+ s->current_mul_index = ((s->last_val + 1) * s->mul_count) >> 32;
+ if (!s->run)
+ encode_mul(s, s->current_mul_index);
+ s->compact_index ++;
}
- if (final && s.i < s.pixel_num)
- sc->bitmap[s.p][sc->unit[s.p][s.i].ndx] = s.compact_index;
+ if (final && s->i < s->pixel_num)
+ sc->bitmap[s->p][sc->unit[s->p][s->i].ndx] = s->compact_index;
}
- if (update) {
- copy_state(state_arg, &s);
- }
- return get_rac_count(&s.rc);
+ return get_rac_count(&s->rc);
}
static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
const uint8_t *src[4])
{
- RemapEncoderState s;
- s.pixel_num = sc->slice_width * sc->slice_height;
+ struct RemapEncoderState *s = &sc->remap_state;
+ s->pixel_num = sc->slice_width * sc->slice_height;
- av_assert0 (s.pixel_num <= 65536);
+ av_assert0 (s->pixel_num <= 65536);
for (int p= 0; p < 1 + 2*f->chroma_planes + f->transparency; p++) {
float score_tab[16] = {0};
int64_t last_val = -1;
int best_index = 0;
- s.rc = sc->c;
- s.i = 0;
- s.p = p;
+ s->rc = sc->c;
+ s->i = 0;
+ s->p = p;
- s.mul_count = 1;
+ s->mul_count = 1;
- for (int i= 0; i<s.pixel_num; i++) {
+ for (int i= 0; i<s->pixel_num; i++) {
int64_t val = sc->unit[p][i].val;
if (val != last_val) {
av_assert2(last_val < val);
@@ -1422,12 +1381,12 @@ static void encode_float32_remap(FFV1Context *f, FFV1SliceContext *sc,
if (score_tab[si] < score_tab[ best_index ])
best_index = si;
}
- s.mul[0] = -1 << best_index;
- s.mul[s.mul_count] = 1;
+ s->mul[0] = -1 << best_index;
+ s->mul[s->mul_count] = 1;
- encode_float32_remap_segment(sc, &s, 1, 1);
+ encode_float32_remap_segment(sc, 1);
- sc->c = s.rc;
+ sc->c = s->rc;
}
}
--
2.48.1
More information about the ffmpeg-devel
mailing list