Average values for decoded audio

This commit is contained in:
Alexander Kojevnikov 2010-07-02 20:01:27 +10:00
parent eea89183d1
commit 9c1a32213b
5 changed files with 109 additions and 23 deletions

View File

@ -55,7 +55,8 @@ SpekAudioContext * spek_audio_open (const char *file_name) {
cx->error = _("The file contains no audio streams");
return cx;
}
cx->codec_context = cx->format_context->streams[cx->audio_stream]->codec;
cx->stream = cx->format_context->streams[cx->audio_stream];
cx->codec_context = cx->stream->codec;
cx->codec = avcodec_find_decoder (cx->codec_context->codec_id);
if (cx->codec == NULL) {
cx->error = _("Cannot find decoder");
@ -71,11 +72,36 @@ SpekAudioContext * spek_audio_open (const char *file_name) {
cx->bits_per_sample = cx->codec_context->bits_per_coded_sample;
}
cx->channels = cx->codec_context->channels;
if (cx->channels <= 0) {
cx->error = _("No audio channels");
return cx;
}
cx->buffer_size = (AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2;
if (avcodec_open (cx->codec_context, cx->codec) < 0) {
cx->error = _("Cannot open decoder");
return cx;
}
switch (cx->codec_context->sample_fmt) {
case SAMPLE_FMT_S16:
cx->width = 16;
cx->fp = FALSE;
break;
case SAMPLE_FMT_S32:
cx->width = 32;
cx->fp = FALSE;
break;
case SAMPLE_FMT_FLT:
cx->width = 32;
cx->fp = TRUE;
break;
case SAMPLE_FMT_DBL:
cx->width = 64;
cx->fp = TRUE;
break;
default:
cx->error = _("Unsupported sample format");
return cx;
}
av_init_packet (&cx->packet);
cx->offset = 0;
return cx;

View File

@ -28,6 +28,7 @@ typedef struct {
AVFormatContext *format_context;
gint audio_stream;
AVCodecContext *codec_context;
AVStream *stream;
AVCodec *codec;
AVPacket packet;
gint offset;
@ -39,6 +40,8 @@ typedef struct {
gint bit_rate;
gint sample_rate;
gint bits_per_sample;
gint width; /* number of bits used to store a sample */
gboolean fp; /* floating-point sample representation */
gint channels;
gint buffer_size; /* minimum buffer size for spek_audio_read() */
} SpekAudioContext;

View File

@ -14,15 +14,34 @@
*
* You should have received a copy of the GNU General Public License
* along with Spek. If not, see <http://www.gnu.org/licenses/>.
*
* Conversion of decoded samples into an FFT-happy format is heavily
* influenced by GstSpectrum which is part of gst-plugins-good.
* The original code:
* (c) 1999 Erik Walthinsen <omega@cse.ogi.edu>
* (c) 2006 Stefan Kost <ensonic@users.sf.net>
* (c) 2007-2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
*/
namespace Spek {
public class Pipeline {
private Audio.Context cx;
public string description { get; private set; }
public int sample_rate { get; private set; }
public delegate void Callback (int sample, float[] values);
public Pipeline (string file_name) {
cx = new Audio.Context (file_name);
private Audio.Context cx;
private int bands;
private int samples;
private int threshold;
private Callback cb;
private uint8[] buffer;
public Pipeline (string file_name, int bands, int samples, int threshold, Callback cb) {
this.cx = new Audio.Context (file_name);
this.bands = bands;
this.samples = samples;
this.threshold = threshold;
this.cb = cb;
// Build the description string.
string[] items = {};
@ -51,24 +70,59 @@ namespace Spek {
description = _("%s: %s").printf (cx.error, description);
}
var buffer = new uint8[cx.buffer_size];
while (cx.read (buffer) > 0);
this.sample_rate = cx.sample_rate;
this.buffer = new uint8[cx.buffer_size];
}
public string file_name {
get { return cx.file_name; }
public void start () {
int nfft = 2 * bands - 2;
var input = new float[nfft];
int pos = 0;
int frames = 0;
int size;
while ((size = cx.read (this.buffer)) > 0) {
uint8 *buffer = (uint8 *) this.buffer;
var block_size = cx.width * cx.channels;
while (size >= block_size) {
input[pos] = average_input (buffer);
buffer += block_size;
size -= block_size;
pos = (pos + 1) % nfft;
frames++;
// TODO
}
assert (size == 0);
}
}
public int bit_rate {
get { return cx.bit_rate; }
}
public int sample_rate {
get { return cx.sample_rate; }
}
public int channels {
get { return cx.channels; }
private float average_input (uint8 *buffer) {
float res = 0f;
float max_value = cx.bits_per_sample > 1 ? (1UL << (cx.bits_per_sample - 1)) - 1 : 0;
if (cx.fp && cx.width == 32) {
float *p = (float *) buffer;
for (int i = 0; i < cx.channels; i++) {
res += p[i];
}
} else if (cx.fp && cx.width == 64) {
double *p = (double *) buffer;
for (int i = 0; i < cx.channels; i++) {
res += (float) p[i];
}
} else if (!cx.fp && cx.width == 32) {
int32 *p = (int32 *) buffer;
for (int i = 0; i < cx.channels; i++) {
res += p[i] / (max_value == 0 ? int32.MAX : max_value);
}
} else if (!cx.fp && cx.width == 16) {
int64 *p = (int64 *) buffer;
for (int i = 0; i < cx.channels; i++) {
res += p[i] / (max_value == 0 ? int16.MAX : max_value);
}
} else {
assert_not_reached ();
}
return res / cx.channels;
}
}
}

View File

@ -63,10 +63,6 @@ namespace Spek {
this.file_name = file_name;
this.info = "";
// TODO
var pipeline = new Pipeline (file_name);
print ("\n%s:\n%s\n", file_name, pipeline.description);
start ();
}
@ -92,6 +88,11 @@ namespace Spek {
image = null;
source = null;
}
// TODO
var pipeline = new Pipeline (file_name, BANDS, samples, THRESHOLD, data_cb);
print ("\n%s:\n%s\n", file_name, pipeline.description);
queue_draw ();
}
@ -240,7 +241,7 @@ namespace Spek {
unowned uchar[] data = surface.get_data ();
// Translate uchar* to uint32* to avoid dealing with endianness.
uint32 *p = &data[i];
uint32 *p = (uint32 *) (&data[i]);
*p = color;
}

View File

@ -9,6 +9,8 @@ namespace Spek.Audio {
public int bit_rate;
public int sample_rate;
public int bits_per_sample;
public int width;
public bool fp;
public int channels;
public int buffer_size;