Average values for decoded audio

2025-06-04 01:21:21 +03:00 · 2010-07-02 20:01:27 +10:00 · 2010-07-02 20:01:27 +10:00 · 9c1a32213b
commit 9c1a32213b
parent eea89183d1
5 changed files with 109 additions and 23 deletions
--- a/src/spek-audio.c
+++ b/src/spek-audio.c
@ -55,7 +55,8 @@ SpekAudioContext * spek_audio_open (const char *file_name) {
 		cx->error = _("The file contains no audio streams");
 		return cx;
 	}
-	cx->codec_context = cx->format_context->streams[cx->audio_stream]->codec;
+	cx->stream = cx->format_context->streams[cx->audio_stream];
+	cx->codec_context = cx->stream->codec;
 	cx->codec = avcodec_find_decoder (cx->codec_context->codec_id);
 	if (cx->codec == NULL) {
 		cx->error = _("Cannot find decoder");
@ -71,11 +72,36 @@ SpekAudioContext * spek_audio_open (const char *file_name) {
 		cx->bits_per_sample = cx->codec_context->bits_per_coded_sample;
 	}
 	cx->channels = cx->codec_context->channels;
+	if (cx->channels <= 0) {
+		cx->error = _("No audio channels");
+		return cx;
+	}
 	cx->buffer_size = (AVCODEC_MAX_AUDIO_FRAME_SIZE * 3) / 2;
 	if (avcodec_open (cx->codec_context, cx->codec) < 0) {
 		cx->error = _("Cannot open decoder");
 		return cx;
 	}
+	switch (cx->codec_context->sample_fmt) {
+	case SAMPLE_FMT_S16:
+		cx->width = 16;
+		cx->fp = FALSE;
+		break;
+	case SAMPLE_FMT_S32:
+		cx->width = 32;
+		cx->fp = FALSE;
+		break;
+	case SAMPLE_FMT_FLT:
+		cx->width = 32;
+		cx->fp = TRUE;
+		break;
+	case SAMPLE_FMT_DBL:
+		cx->width = 64;
+		cx->fp = TRUE;
+		break;
+	default:
+		cx->error = _("Unsupported sample format");
+		return cx;
+	}
 	av_init_packet (&cx->packet);
 	cx->offset = 0;
 	return cx;
--- a/src/spek-audio.h
+++ b/src/spek-audio.h
@ -28,6 +28,7 @@ typedef struct {
 	AVFormatContext *format_context;
 	gint audio_stream;
 	AVCodecContext *codec_context;
+	AVStream *stream;
 	AVCodec *codec;
 	AVPacket packet;
 	gint offset;
@ -39,6 +40,8 @@ typedef struct {
 	gint bit_rate;
 	gint sample_rate;
 	gint bits_per_sample;
+	gint width; /* number of bits used to store a sample */
+	gboolean fp; /* floating-point sample representation */
 	gint channels;
 	gint buffer_size; /* minimum buffer size for spek_audio_read() */
 } SpekAudioContext;
--- a/src/spek-pipeline.vala
+++ b/src/spek-pipeline.vala
@ -14,15 +14,34 @@
 *
 * You should have received a copy of the GNU General Public License
 * along with Spek.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Conversion of decoded samples into an FFT-happy format is heavily
+ * influenced by GstSpectrum which is part of gst-plugins-good.
+ * The original code:
+ * (c) 1999 Erik Walthinsen <omega@cse.ogi.edu>
+ * (c) 2006 Stefan Kost <ensonic@users.sf.net>
+ * (c) 2007-2009 Sebastian Dröge <sebastian.droege@collabora.co.uk>
 */

 namespace Spek {
 	public class Pipeline {
-		private Audio.Context cx;
 		public string description { get; private set; }
+		public int sample_rate { get; private set; }
+		public delegate void Callback (int sample, float[] values);

-		public Pipeline (string file_name) {
-			cx = new Audio.Context (file_name);
+		private Audio.Context cx;
+		private int bands;
+		private int samples;
+		private int threshold;
+		private Callback cb;
+		private uint8[] buffer;
+
+		public Pipeline (string file_name, int bands, int samples, int threshold, Callback cb) {
+			this.cx = new Audio.Context (file_name);
+			this.bands = bands;
+			this.samples = samples;
+			this.threshold = threshold;
+			this.cb = cb;

 			// Build the description string.
 			string[] items = {};
@ -51,24 +70,59 @@ namespace Spek {
 				description = _("%s: %s").printf (cx.error, description);
 			}

-			var buffer = new uint8[cx.buffer_size];
-			while (cx.read (buffer) > 0);
+			this.sample_rate = cx.sample_rate;
+			this.buffer = new uint8[cx.buffer_size];
 		}

-		public string file_name {
-			get { return cx.file_name; }
+		public void start () {
+			int nfft = 2 * bands - 2;
+			var input = new float[nfft];
+			int pos = 0;
+			int frames = 0;
+			int size;
+			while ((size = cx.read (this.buffer)) > 0) {
+				uint8 *buffer = (uint8 *) this.buffer;
+				var block_size = cx.width * cx.channels;
+				while (size >= block_size) {
+					input[pos] = average_input (buffer);
+					buffer += block_size;
+					size -= block_size;
+					pos = (pos + 1) % nfft;
+					frames++;
+
+					// TODO
+				}
+				assert (size == 0);
+			}
 		}

-		public int bit_rate {
-			get { return cx.bit_rate; }
-		}
-
-		public int sample_rate {
-			get { return cx.sample_rate; }
-		}
-
-		public int channels {
-			get { return cx.channels; }
+		private float average_input (uint8 *buffer) {
+			float res = 0f;
+			float max_value = cx.bits_per_sample > 1 ? (1UL << (cx.bits_per_sample - 1)) - 1 : 0;
+			if (cx.fp && cx.width == 32) {
+				float *p = (float *) buffer;
+				for (int i = 0; i < cx.channels; i++) {
+					res += p[i];
+				}
+			} else if (cx.fp && cx.width == 64) {
+				double *p = (double *) buffer;
+				for (int i = 0; i < cx.channels; i++) {
+					res += (float) p[i];
+				}
+			} else if (!cx.fp && cx.width == 32) {
+				int32 *p = (int32 *) buffer;
+				for (int i = 0; i < cx.channels; i++) {
+					res += p[i] / (max_value == 0 ? int32.MAX : max_value);
+				}
+			} else if (!cx.fp && cx.width == 16) {
+				int64 *p = (int64 *) buffer;
+				for (int i = 0; i < cx.channels; i++) {
+					res += p[i] / (max_value == 0 ? int16.MAX : max_value);
+				}
+			} else {
+				assert_not_reached ();
+			}
+			return res / cx.channels;
 		}
 	}
 }
--- a/src/spek-spectrogram.vala
+++ b/src/spek-spectrogram.vala
@ -63,10 +63,6 @@ namespace Spek {
 			this.file_name = file_name;
 			this.info = "";

-			// TODO
-			var pipeline = new Pipeline (file_name);
-			print ("\n%s:\n%s\n", file_name, pipeline.description);
-
 			start ();
 		}

@ -92,6 +88,11 @@ namespace Spek {
 				image = null;
 				source = null;
 			}
+
+			// TODO
+			var pipeline = new Pipeline (file_name, BANDS, samples, THRESHOLD, data_cb);
+			print ("\n%s:\n%s\n", file_name, pipeline.description);
+
 			queue_draw ();
 		}

@ -240,7 +241,7 @@ namespace Spek {
 			unowned uchar[] data = surface.get_data ();

 			// Translate uchar* to uint32* to avoid dealing with endianness.
-			uint32 *p = &data[i];
+			uint32 *p = (uint32 *) (&data[i]);
 			*p = color;
 		}

--- a/vapi/spek-audio.vapi
+++ b/vapi/spek-audio.vapi
@ -9,6 +9,8 @@ namespace Spek.Audio {
 		public int bit_rate;
 		public int sample_rate;
 		public int bits_per_sample;
+		public int width;
+		public bool fp;
 		public int channels;
 		public int buffer_size;