Fix decoding for streams containing differently sampled frames

This commit is contained in:
Alexander Kojevnikov 2013-02-28 21:49:59 -08:00
parent c4c134684e
commit 79ae75a7b0
3 changed files with 71 additions and 88 deletions

View File

@ -31,7 +31,7 @@ public:
AudioFileImpl(
AudioError error, AVFormatContext *format_context, int audio_stream,
const std::string& codec_name, int bit_rate, int sample_rate, int bits_per_sample,
int channels, double duration, bool is_planar, int width, bool fp
int channels, double duration
);
~AudioFileImpl() override;
void start(int samples) override;
@ -44,9 +44,7 @@ public:
int get_bits_per_sample() const override { return this->bits_per_sample; }
int get_channels() const override { return this->channels; }
double get_duration() const override { return this->duration; }
int get_width() const override { return this->width; }
bool get_fp() const override { return this->fp; }
const uint8_t *get_buffer() const override { return this->buffer; }
const float *get_buffer() const override { return this->buffer; }
int64_t get_frames_per_interval() const override { return this->frames_per_interval; }
int64_t get_error_per_interval() const override { return this->error_per_interval; }
int64_t get_error_base() const override { return this->error_base; }
@ -61,15 +59,12 @@ private:
int bits_per_sample;
int channels;
double duration;
bool is_planar;
int width;
bool fp;
AVPacket packet;
int offset;
AVFrame *frame;
int buffer_size;
uint8_t *buffer;
int buffer_len;
float *buffer;
// TODO: these guys don't belong here, move them somewhere else when revamping the pipeline
int64_t frames_per_interval;
int64_t error_per_interval;
@ -163,20 +158,12 @@ std::unique_ptr<AudioFile> Audio::open(const std::string& file_name)
error = AudioError::CANNOT_OPEN_DECODER;
}
bool is_planar = false;
int width = 0;
bool fp = false;
if (!error) {
is_planar = av_sample_fmt_is_planar(codec_context->sample_fmt);
width = av_get_bytes_per_sample(codec_context->sample_fmt);
AVSampleFormat fmt = codec_context->sample_fmt;
if (fmt == AV_SAMPLE_FMT_S16 || fmt == AV_SAMPLE_FMT_S16P ||
fmt == AV_SAMPLE_FMT_S32 || fmt == AV_SAMPLE_FMT_S32P) {
fp = false;
} else if (fmt == AV_SAMPLE_FMT_FLT || fmt == AV_SAMPLE_FMT_FLTP ||
fmt == AV_SAMPLE_FMT_DBL || fmt == AV_SAMPLE_FMT_DBLP ) {
fp = true;
} else {
if (fmt != AV_SAMPLE_FMT_S16 && fmt != AV_SAMPLE_FMT_S16P &&
fmt != AV_SAMPLE_FMT_S32 && fmt != AV_SAMPLE_FMT_S32P &&
fmt != AV_SAMPLE_FMT_FLT && fmt != AV_SAMPLE_FMT_FLTP &&
fmt != AV_SAMPLE_FMT_DBL && fmt != AV_SAMPLE_FMT_DBLP ) {
error = AudioError::BAD_SAMPLE_FORMAT;
}
}
@ -184,26 +171,26 @@ std::unique_ptr<AudioFile> Audio::open(const std::string& file_name)
return std::unique_ptr<AudioFile>(new AudioFileImpl(
error, format_context, audio_stream,
codec_name, bit_rate, sample_rate, bits_per_sample,
channels, duration, is_planar, width, fp
channels, duration
));
}
AudioFileImpl::AudioFileImpl(
AudioError error, AVFormatContext *format_context, int audio_stream,
const std::string& codec_name, int bit_rate, int sample_rate, int bits_per_sample,
int channels, double duration, bool is_planar, int width, bool fp
int channels, double duration
) :
error(error), format_context(format_context), audio_stream(audio_stream),
codec_name(codec_name), bit_rate(bit_rate),
sample_rate(sample_rate), bits_per_sample(bits_per_sample),
channels(channels), duration(duration), is_planar(is_planar), width(width), fp(fp)
channels(channels), duration(duration)
{
av_init_packet(&this->packet);
this->packet.data = nullptr;
this->packet.size = 0;
this->offset = 0;
this->frame = avcodec_alloc_frame();
this->buffer_size = 0;
this->buffer_len = 0;
this->buffer = nullptr;
this->frames_per_interval = 0;
this->error_per_interval = 0;
@ -276,27 +263,56 @@ int AudioFileImpl::read()
// We have data, return it and come back for more later.
int samples = this->frame->nb_samples;
int channels = this->channels;
int width = this->width;
int buffer_size = samples * channels * width;
if (buffer_size > this->buffer_size) {
this->buffer = (uint8_t*)av_realloc(this->buffer, buffer_size);
this->buffer_size = buffer_size;
int buffer_len = samples * channels;
if (buffer_len > this->buffer_len) {
this->buffer = static_cast<float*>(
av_realloc(this->buffer, buffer_len * sizeof(float))
);
this->buffer_len = buffer_len;
}
if (this->is_planar) {
AVSampleFormat format = static_cast<AVSampleFormat>(this->frame->format);
int is_planar = av_sample_fmt_is_planar(format);
int i = 0;
for (int sample = 0; sample < samples; ++sample) {
for (int channel = 0; channel < channels; ++channel) {
uint8_t *buffer = this->buffer + channel * width;
uint8_t *data = this->frame->data[channel];
for (int sample = 0; sample < samples; ++sample) {
for (int i = 0; i < width; ++i) {
*buffer++ = *data++;
}
buffer += (channels - 1) * width;
uint8_t *data;
int offset;
if (is_planar) {
data = this->frame->data[channel];
offset = sample;
} else {
data = this->frame->data[0];
offset = i;
}
float value;
switch (format) {
case AV_SAMPLE_FMT_S16:
case AV_SAMPLE_FMT_S16P:
value = reinterpret_cast<int16_t*>(data)[offset]
/ static_cast<float>(INT16_MAX);
break;
case AV_SAMPLE_FMT_S32:
case AV_SAMPLE_FMT_S32P:
value = reinterpret_cast<int32_t*>(data)[offset]
/ static_cast<float>(INT32_MAX);
break;
case AV_SAMPLE_FMT_FLT:
case AV_SAMPLE_FMT_FLTP:
value = reinterpret_cast<float*>(data)[offset];
break;
case AV_SAMPLE_FMT_DBL:
case AV_SAMPLE_FMT_DBLP:
value = reinterpret_cast<double*>(data)[offset];
break;
default:
value = 0.0f;
break;
}
this->buffer[i++] = value;
}
} else {
memcpy(this->buffer, this->frame->data[0], buffer_size);
}
return buffer_size;
return buffer_len;
}
if (this->packet.data) {
this->packet.data -= this->offset;

View File

@ -49,9 +49,7 @@ public:
virtual int get_bits_per_sample() const = 0;
virtual int get_channels() const = 0;
virtual double get_duration() const = 0;
virtual int get_width() const = 0;
virtual bool get_fp() const = 0;
virtual const uint8_t *get_buffer() const = 0;
virtual const float *get_buffer() const = 0;
virtual int64_t get_frames_per_interval() const = 0;
virtual int64_t get_error_per_interval() const = 0;
virtual int64_t get_error_base() const = 0;

View File

@ -83,7 +83,6 @@ struct spek_pipeline
static void * reader_func(void *);
static void * worker_func(void *);
static void reader_sync(struct spek_pipeline *p, int pos);
static float average_input(const struct spek_pipeline *p, const void *buffer);
struct spek_pipeline * spek_pipeline_open(
std::unique_ptr<AudioFile> file, int bands, int samples, spek_pipeline_cb cb, void *cb_data)
@ -295,16 +294,20 @@ static void * reader_func(void *pp)
}
int pos = 0, prev_pos = 0;
int block_size = p->file->get_width() * p->file->get_channels();
int size;
while ((size = p->file->read()) > 0) {
int channels = p->file->get_channels();
int len;
while ((len = p->file->read()) > 0) {
if (p->quit) break;
const uint8_t *buffer = p->file->get_buffer();
while (size >= block_size) {
p->input[pos] = average_input(p, buffer);
buffer += block_size;
size -= block_size;
const float *buffer = p->file->get_buffer();
while (len >= channels) {
float val = 0.0f;
for (int i = 0; i < channels; i++) {
val += buffer[i];
}
p->input[pos] = val / channels;
buffer += channels;
len -= channels;
pos = (pos + 1) % p->input_size;
// Wake up the worker if we have enough data.
@ -312,7 +315,7 @@ static void * reader_func(void *pp)
reader_sync(p, prev_pos = pos);
}
}
assert(size == 0);
assert(len == 0);
}
if (pos != prev_pos) {
@ -431,37 +434,3 @@ static void * worker_func(void *pp)
}
}
}
static float average_input(const struct spek_pipeline *p, const void *buffer)
{
int channels = p->file->get_channels();
float res = 0.0f;
if (p->file->get_fp()) {
if (p->file->get_width() == 4) {
float *b = (float*)buffer;
for (int i = 0; i < channels; i++) {
res += b[i];
}
} else {
assert(p->file->get_width() == 8);
double *b = (double*)buffer;
for (int i = 0; i < channels; i++) {
res += (float) b[i];
}
}
} else {
if (p->file->get_width() == 2) {
int16_t *b = (int16_t*)buffer;
for (int i = 0; i < channels; i++) {
res += b[i] / (float) INT16_MAX;
}
} else {
assert (p->file->get_width() == 4);
int32_t *b = (int32_t*)buffer;
for (int i = 0; i < channels; i++) {
res += b[i] / (float) INT32_MAX;
}
}
}
return res / channels;
}