fluidsynth: performance improvements

* Use floats instead of double for sound calculations. This improves performance notable and was default for version 1.1.6 using autotools. * Fix buffer overrun when using floats * Make use of ARM NEON for multithreading enabled Performance and sound correctnes was tested with qtractor and a reworked version of fluidsynth-dssi [1-2]. Tests were performed for single- and multithreading enabled. [1] https://github.com/schnitzeltony/fluidsynth-dssi/commit/bad09c6f5c5508c5f5330aa5188510f975e50c50 [2] https://github.com/schnitzeltony/meta-qt5-extra/blob/master/recipes-misc/recipes-multimedia/fluidsynth/fluidsynth-dssi_1.0.0.bb Signed-off-by: Andreas Müller <schnitzeltony@gmail.com> Signed-off-by: Armin Kuster <akuster808@gmail.com>
author: Andreas Müller <schnitzeltony@gmail.com> 2017-12-01 12:08:10 +0100
committer: Armin Kuster <akuster808@gmail.com> 2017-12-11 07:58:56 -0800
commit: da778f933ccf34d01dfe8ca273c2191acfb36ef3 (patch)
tree: 80ee785896568c2fd6d8b64012090a9ad21446aa /meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch
parent: c0b74f42e00c84ba8419bc28de564cfa5c8d7987 (diff)
download: meta-openembedded-da778f933ccf34d01dfe8ca273c2191acfb36ef3.tar.gz
1 files changed, 76 insertions, 0 deletions
diff --git a/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch
new file mode 100644
index 0000000000..0e1846e31c
--- /dev/null
+++ b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch
@@ -0,0 +1,76 @@
+From 2de7e128fbdf528716b500cf27ed9a4358c931c9 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
+Date: Fri, 24 Nov 2017 00:05:35 +0100
+Subject: [PATCH 2/2] Use ARM-NEON accelaration for float-multithreaded setups
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+Upstream-Status: Pending
+Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
+---
+ src/rvoice/fluid_rvoice_mixer.c | 26 ++++++++++++++++++++++++++
+ 1 file changed, 26 insertions(+)
+diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c
+index 9616518..dbf8057 100644
+--- a/src/rvoice/fluid_rvoice_mixer.c
+++ b/src/rvoice/fluid_rvoice_mixer.c
+@@ -27,6 +27,10 @@
+ #include "fluid_ladspa.h"
+ #include "fluid_synth.h"
+ 
+#if defined(__ARM_NEON__)
+#include "arm_neon.h"
+#endif
+
+ 
+ #define ENABLE_MIXER_THREADS 1
+ 
+@@ -794,20 +798,42 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src)
+   if (minbuf > src->buf_count)
+     minbuf = src->buf_count;
+   for (i=0; i < minbuf; i++) {
+#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
+    for (j=0; j < scount; j+=4) {
+        float32x4_t vleft = vld1q_f32(&dest->left_buf[i][j]);
+        float32x4_t vright = vld1q_f32(&dest->right_buf[i][j]);
+        vleft = vaddq_f32(vleft, vld1q_f32(&src->left_buf[i][j]));
+        vright = vaddq_f32(vright, vld1q_f32(&src->right_buf[i][j]));
+        vst1q_f32(&dest->left_buf[i][j], vleft);
+        vst1q_f32(&dest->right_buf[i][j], vright);
+    }
+#else
+     for (j=0; j < scount; j++) {
+       dest->left_buf[i][j] += src->left_buf[i][j];
+       dest->right_buf[i][j] += src->right_buf[i][j];
+     }
+#endif
+   }
+ 
+   minbuf = dest->fx_buf_count;
+   if (minbuf > src->fx_buf_count)
+     minbuf = src->fx_buf_count;
+   for (i=0; i < minbuf; i++) {
+#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
+    for (j=0; j < scount; j+=4) {
+        float32x4_t vleft = vld1q_f32(&dest->fx_left_buf[i][j]);
+        float32x4_t vright = vld1q_f32(&dest->fx_right_buf[i][j]);
+        vleft = vaddq_f32(vleft, vld1q_f32(&src->fx_left_buf[i][j]));
+        vright = vaddq_f32(vright, vld1q_f32(&src->fx_right_buf[i][j]));
+        vst1q_f32(&dest->fx_left_buf[i][j], vleft);
+        vst1q_f32(&dest->fx_right_buf[i][j], vright);
+    }
+#else
+     for (j=0; j < scount; j++) {
+       dest->fx_left_buf[i][j] += src->fx_left_buf[i][j];
+       dest->fx_right_buf[i][j] += src->fx_right_buf[i][j];
+     }
+#endif
+   }
+ }
+ 
+-- 
+2.9.5
author	Andreas Müller <schnitzeltony@gmail.com>	2017-12-01 12:08:10 +0100
committer	Armin Kuster <akuster808@gmail.com>	2017-12-11 07:58:56 -0800
commit	da778f933ccf34d01dfe8ca273c2191acfb36ef3 (patch)
tree	80ee785896568c2fd6d8b64012090a9ad21446aa /meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch
parent	c0b74f42e00c84ba8419bc28de564cfa5c8d7987 (diff)
download	meta-openembedded-da778f933ccf34d01dfe8ca273c2191acfb36ef3.tar.gz

diff --git a/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch new file mode 100644 index 0000000000..0e1846e31c --- /dev/null +++ b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch
@@ -0,0 +1,76 @@
	1	From 2de7e128fbdf528716b500cf27ed9a4358c931c9 Mon Sep 17 00:00:00 2001
	2	From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
	3	Date: Fri, 24 Nov 2017 00:05:35 +0100
	4	Subject: [PATCH 2/2] Use ARM-NEON accelaration for float-multithreaded setups
	5	MIME-Version: 1.0
	6	Content-Type: text/plain; charset=UTF-8
	7	Content-Transfer-Encoding: 8bit
	8
	9	Upstream-Status: Pending
	10
	11	Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
	12	---
	13	src/rvoice/fluid_rvoice_mixer.c \| 26 ++++++++++++++++++++++++++
	14	1 file changed, 26 insertions(+)
	15
	16	diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c
	17	index 9616518..dbf8057 100644
	18	--- a/src/rvoice/fluid_rvoice_mixer.c
	19	+++ b/src/rvoice/fluid_rvoice_mixer.c
	20	@@ -27,6 +27,10 @@
	21	#include "fluid_ladspa.h"
	22	#include "fluid_synth.h"
	23
	24	+#if defined(__ARM_NEON__)
	25	+#include "arm_neon.h"
	26	+#endif
	27	+
	28
	29	#define ENABLE_MIXER_THREADS 1
	30
	31	@@ -794,20 +798,42 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src)
	32	if (minbuf > src->buf_count)
	33	minbuf = src->buf_count;
	34	for (i=0; i < minbuf; i++) {
	35	+#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
	36	+ for (j=0; j < scount; j+=4) {
	37	+ float32x4_t vleft = vld1q_f32(&dest->left_buf[i][j]);
	38	+ float32x4_t vright = vld1q_f32(&dest->right_buf[i][j]);
	39	+ vleft = vaddq_f32(vleft, vld1q_f32(&src->left_buf[i][j]));
	40	+ vright = vaddq_f32(vright, vld1q_f32(&src->right_buf[i][j]));
	41	+ vst1q_f32(&dest->left_buf[i][j], vleft);
	42	+ vst1q_f32(&dest->right_buf[i][j], vright);
	43	+ }
	44	+#else
	45	for (j=0; j < scount; j++) {
	46	dest->left_buf[i][j] += src->left_buf[i][j];
	47	dest->right_buf[i][j] += src->right_buf[i][j];
	48	}
	49	+#endif
	50	}
	51
	52	minbuf = dest->fx_buf_count;
	53	if (minbuf > src->fx_buf_count)
	54	minbuf = src->fx_buf_count;
	55	for (i=0; i < minbuf; i++) {
	56	+#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
	57	+ for (j=0; j < scount; j+=4) {
	58	+ float32x4_t vleft = vld1q_f32(&dest->fx_left_buf[i][j]);
	59	+ float32x4_t vright = vld1q_f32(&dest->fx_right_buf[i][j]);
	60	+ vleft = vaddq_f32(vleft, vld1q_f32(&src->fx_left_buf[i][j]));
	61	+ vright = vaddq_f32(vright, vld1q_f32(&src->fx_right_buf[i][j]));
	62	+ vst1q_f32(&dest->fx_left_buf[i][j], vleft);
	63	+ vst1q_f32(&dest->fx_right_buf[i][j], vright);
	64	+ }
	65	+#else
	66	for (j=0; j < scount; j++) {
	67	dest->fx_left_buf[i][j] += src->fx_left_buf[i][j];
	68	dest->fx_right_buf[i][j] += src->fx_right_buf[i][j];
	69	}
	70	+#endif
	71	}
	72	}
	73
	74	--
	75	2.9.5
	76