Fix decoding for streams containing differently sampled frames

2025-07-06 03:38:47 +03:00 · 2013-02-28 21:49:59 -08:00 · 2013-02-28 21:49:59 -08:00 · 79ae75a7b0
commit 79ae75a7b0
parent c4c134684e
3 changed files with 71 additions and 88 deletions
--- a/src/spek-audio.cc
+++ b/src/spek-audio.cc
@ -31,7 +31,7 @@ public:
    AudioFileImpl(
        AudioError error, AVFormatContext *format_context, int audio_stream,
        const std::string& codec_name, int bit_rate, int sample_rate, int bits_per_sample,
-        int channels, double duration, bool is_planar, int width, bool fp
+        int channels, double duration
    );
    ~AudioFileImpl() override;
    void start(int samples) override;
@ -44,9 +44,7 @@ public:
    int get_bits_per_sample() const override { return this->bits_per_sample; }
    int get_channels() const override { return this->channels; }
    double get_duration() const override { return this->duration; }
-    int get_width() const override { return this->width; }
-    bool get_fp() const override { return this->fp; }
-    const uint8_t *get_buffer() const override { return this->buffer; }
+    const float *get_buffer() const override { return this->buffer; }
    int64_t get_frames_per_interval() const override { return this->frames_per_interval; }
    int64_t get_error_per_interval() const override { return this->error_per_interval; }
    int64_t get_error_base() const override { return this->error_base; }
@ -61,15 +59,12 @@ private:
    int bits_per_sample;
    int channels;
    double duration;
-    bool is_planar;
-    int width;
-    bool fp;

    AVPacket packet;
    int offset;
    AVFrame *frame;
-    int buffer_size;
-    uint8_t *buffer;
+    int buffer_len;
+    float *buffer;
    // TODO: these guys don't belong here, move them somewhere else when revamping the pipeline
    int64_t frames_per_interval;
    int64_t error_per_interval;
@ -163,20 +158,12 @@ std::unique_ptr<AudioFile> Audio::open(const std::string& file_name)
        error = AudioError::CANNOT_OPEN_DECODER;
    }

-    bool is_planar = false;
-    int width = 0;
-    bool fp = false;
    if (!error) {
-        is_planar = av_sample_fmt_is_planar(codec_context->sample_fmt);
-        width = av_get_bytes_per_sample(codec_context->sample_fmt);
        AVSampleFormat fmt = codec_context->sample_fmt;
-        if (fmt == AV_SAMPLE_FMT_S16 || fmt == AV_SAMPLE_FMT_S16P ||
-            fmt == AV_SAMPLE_FMT_S32 || fmt == AV_SAMPLE_FMT_S32P) {
-            fp = false;
-        } else if (fmt == AV_SAMPLE_FMT_FLT || fmt == AV_SAMPLE_FMT_FLTP ||
-            fmt == AV_SAMPLE_FMT_DBL || fmt == AV_SAMPLE_FMT_DBLP ) {
-            fp = true;
-        } else {
+        if (fmt != AV_SAMPLE_FMT_S16 && fmt != AV_SAMPLE_FMT_S16P &&
+            fmt != AV_SAMPLE_FMT_S32 && fmt != AV_SAMPLE_FMT_S32P &&
+            fmt != AV_SAMPLE_FMT_FLT && fmt != AV_SAMPLE_FMT_FLTP &&
+            fmt != AV_SAMPLE_FMT_DBL && fmt != AV_SAMPLE_FMT_DBLP ) {
            error = AudioError::BAD_SAMPLE_FORMAT;
        }
    }
@ -184,26 +171,26 @@ std::unique_ptr<AudioFile> Audio::open(const std::string& file_name)
    return std::unique_ptr<AudioFile>(new AudioFileImpl(
        error, format_context, audio_stream,
        codec_name, bit_rate, sample_rate, bits_per_sample,
-        channels, duration, is_planar, width, fp
+        channels, duration
    ));
 }

 AudioFileImpl::AudioFileImpl(
    AudioError error, AVFormatContext *format_context, int audio_stream,
    const std::string& codec_name, int bit_rate, int sample_rate, int bits_per_sample,
-    int channels, double duration, bool is_planar, int width, bool fp
+    int channels, double duration
 ) :
    error(error), format_context(format_context), audio_stream(audio_stream),
    codec_name(codec_name), bit_rate(bit_rate),
    sample_rate(sample_rate), bits_per_sample(bits_per_sample),
-    channels(channels), duration(duration), is_planar(is_planar), width(width), fp(fp)
+    channels(channels), duration(duration)
 {
    av_init_packet(&this->packet);
    this->packet.data = nullptr;
    this->packet.size = 0;
    this->offset = 0;
    this->frame = avcodec_alloc_frame();
-    this->buffer_size = 0;
+    this->buffer_len = 0;
    this->buffer = nullptr;
    this->frames_per_interval = 0;
    this->error_per_interval = 0;
@ -276,27 +263,56 @@ int AudioFileImpl::read()
            // We have data, return it and come back for more later.
            int samples = this->frame->nb_samples;
            int channels = this->channels;
-            int width = this->width;
-            int buffer_size = samples * channels * width;
-            if (buffer_size > this->buffer_size) {
-                this->buffer = (uint8_t*)av_realloc(this->buffer, buffer_size);
-                this->buffer_size = buffer_size;
+            int buffer_len = samples * channels;
+            if (buffer_len > this->buffer_len) {
+                this->buffer = static_cast<float*>(
+                    av_realloc(this->buffer, buffer_len * sizeof(float))
+                );
+                this->buffer_len = buffer_len;
            }
-            if (this->is_planar) {
+
+            AVSampleFormat format = static_cast<AVSampleFormat>(this->frame->format);
+            int is_planar = av_sample_fmt_is_planar(format);
+            int i = 0;
+            for (int sample = 0; sample < samples; ++sample) {
                for (int channel = 0; channel < channels; ++channel) {
-                    uint8_t *buffer = this->buffer + channel * width;
-                    uint8_t *data = this->frame->data[channel];
-                    for (int sample = 0; sample < samples; ++sample) {
-                        for (int i = 0; i < width; ++i) {
-                            *buffer++ = *data++;
-                        }
-                        buffer += (channels - 1) * width;
+                    uint8_t *data;
+                    int offset;
+                    if (is_planar) {
+                        data = this->frame->data[channel];
+                        offset = sample;
+                    } else {
+                        data = this->frame->data[0];
+                        offset = i;
                    }
+                    float value;
+                    switch (format) {
+                    case AV_SAMPLE_FMT_S16:
+                    case AV_SAMPLE_FMT_S16P:
+                        value = reinterpret_cast<int16_t*>(data)[offset]
+                            / static_cast<float>(INT16_MAX);
+                        break;
+                    case AV_SAMPLE_FMT_S32:
+                    case AV_SAMPLE_FMT_S32P:
+                        value = reinterpret_cast<int32_t*>(data)[offset]
+                            / static_cast<float>(INT32_MAX);
+                        break;
+                    case AV_SAMPLE_FMT_FLT:
+                    case AV_SAMPLE_FMT_FLTP:
+                        value = reinterpret_cast<float*>(data)[offset];
+                        break;
+                    case AV_SAMPLE_FMT_DBL:
+                    case AV_SAMPLE_FMT_DBLP:
+                        value = reinterpret_cast<double*>(data)[offset];
+                        break;
+                    default:
+                        value = 0.0f;
+                        break;
+                    }
+                    this->buffer[i++] = value;
                }
-            } else {
-                memcpy(this->buffer, this->frame->data[0], buffer_size);
            }
-            return buffer_size;
+            return buffer_len;
        }
        if (this->packet.data) {
            this->packet.data -= this->offset;
--- a/src/spek-audio.h
+++ b/src/spek-audio.h
@ -49,9 +49,7 @@ public:
    virtual int get_bits_per_sample() const = 0;
    virtual int get_channels() const = 0;
    virtual double get_duration() const = 0;
-    virtual int get_width() const = 0;
-    virtual bool get_fp() const = 0;
-    virtual const uint8_t *get_buffer() const = 0;
+    virtual const float *get_buffer() const = 0;
    virtual int64_t get_frames_per_interval() const = 0;
    virtual int64_t get_error_per_interval() const = 0;
    virtual int64_t get_error_base() const = 0;
--- a/src/spek-pipeline.cc
+++ b/src/spek-pipeline.cc
@ -83,7 +83,6 @@ struct spek_pipeline
 static void * reader_func(void *);
 static void * worker_func(void *);
 static void reader_sync(struct spek_pipeline *p, int pos);
-static float average_input(const struct spek_pipeline *p, const void *buffer);

 struct spek_pipeline * spek_pipeline_open(
    std::unique_ptr<AudioFile> file, int bands, int samples, spek_pipeline_cb cb, void *cb_data)
@ -295,16 +294,20 @@ static void * reader_func(void *pp)
    }

    int pos = 0, prev_pos = 0;
-    int block_size = p->file->get_width() * p->file->get_channels();
-    int size;
-    while ((size = p->file->read()) > 0) {
+    int channels = p->file->get_channels();
+    int len;
+    while ((len = p->file->read()) > 0) {
        if (p->quit) break;

-        const uint8_t *buffer = p->file->get_buffer();
-        while (size >= block_size) {
-            p->input[pos] = average_input(p, buffer);
-            buffer += block_size;
-            size -= block_size;
+        const float *buffer = p->file->get_buffer();
+        while (len >= channels) {
+            float val = 0.0f;
+            for (int i = 0; i < channels; i++) {
+                val += buffer[i];
+            }
+            p->input[pos] = val / channels;
+            buffer += channels;
+            len -= channels;
            pos = (pos + 1) % p->input_size;

            // Wake up the worker if we have enough data.
@ -312,7 +315,7 @@ static void * reader_func(void *pp)
                reader_sync(p, prev_pos = pos);
            }
        }
-        assert(size == 0);
+        assert(len == 0);
    }

    if (pos != prev_pos) {
@ -431,37 +434,3 @@ static void * worker_func(void *pp)
        }
    }
 }
-
-static float average_input(const struct spek_pipeline *p, const void *buffer)
-{
-    int channels = p->file->get_channels();
-    float res = 0.0f;
-    if (p->file->get_fp()) {
-        if (p->file->get_width() == 4) {
-            float *b = (float*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += b[i];
-            }
-        } else {
-            assert(p->file->get_width() == 8);
-            double *b = (double*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += (float) b[i];
-            }
-        }
-    } else {
-        if (p->file->get_width() == 2) {
-            int16_t *b = (int16_t*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += b[i] / (float) INT16_MAX;
-            }
-        } else {
-            assert (p->file->get_width() == 4);
-            int32_t *b = (int32_t*)buffer;
-            for (int i = 0; i < channels; i++) {
-                res += b[i] / (float) INT32_MAX;
-            }
-        }
-    }
-    return res / channels;
-}