diff options
author | Andreas Müller <schnitzeltony@gmail.com> | 2017-12-01 12:08:10 +0100 |
---|---|---|
committer | Armin Kuster <akuster808@gmail.com> | 2017-12-11 07:58:56 -0800 |
commit | da778f933ccf34d01dfe8ca273c2191acfb36ef3 (patch) | |
tree | 80ee785896568c2fd6d8b64012090a9ad21446aa /meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch | |
parent | c0b74f42e00c84ba8419bc28de564cfa5c8d7987 (diff) | |
download | meta-openembedded-da778f933ccf34d01dfe8ca273c2191acfb36ef3.tar.gz |
fluidsynth: performance improvements
* Use floats instead of double for sound calculations. This improves
performance notable and was default for version 1.1.6 using autotools.
* Fix buffer overrun when using floats
* Make use of ARM NEON for multithreading enabled
Performance and sound correctnes was tested with qtractor and a reworked
version of fluidsynth-dssi [1-2]. Tests were performed for single- and
multithreading enabled.
[1] https://github.com/schnitzeltony/fluidsynth-dssi/commit/bad09c6f5c5508c5f5330aa5188510f975e50c50
[2] https://github.com/schnitzeltony/meta-qt5-extra/blob/master/recipes-misc/recipes-multimedia/fluidsynth/fluidsynth-dssi_1.0.0.bb
Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
Signed-off-by: Armin Kuster <akuster808@gmail.com>
Diffstat (limited to 'meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch')
-rw-r--r-- | meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch | 76 |
1 files changed, 76 insertions, 0 deletions
diff --git a/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch new file mode 100644 index 0000000000..0e1846e31c --- /dev/null +++ b/meta-multimedia/recipes-multimedia/fluidsynth/files/0002-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch | |||
@@ -0,0 +1,76 @@ | |||
1 | From 2de7e128fbdf528716b500cf27ed9a4358c931c9 Mon Sep 17 00:00:00 2001 | ||
2 | From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com> | ||
3 | Date: Fri, 24 Nov 2017 00:05:35 +0100 | ||
4 | Subject: [PATCH 2/2] Use ARM-NEON accelaration for float-multithreaded setups | ||
5 | MIME-Version: 1.0 | ||
6 | Content-Type: text/plain; charset=UTF-8 | ||
7 | Content-Transfer-Encoding: 8bit | ||
8 | |||
9 | Upstream-Status: Pending | ||
10 | |||
11 | Signed-off-by: Andreas Müller <schnitzeltony@gmail.com> | ||
12 | --- | ||
13 | src/rvoice/fluid_rvoice_mixer.c | 26 ++++++++++++++++++++++++++ | ||
14 | 1 file changed, 26 insertions(+) | ||
15 | |||
16 | diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c | ||
17 | index 9616518..dbf8057 100644 | ||
18 | --- a/src/rvoice/fluid_rvoice_mixer.c | ||
19 | +++ b/src/rvoice/fluid_rvoice_mixer.c | ||
20 | @@ -27,6 +27,10 @@ | ||
21 | #include "fluid_ladspa.h" | ||
22 | #include "fluid_synth.h" | ||
23 | |||
24 | +#if defined(__ARM_NEON__) | ||
25 | +#include "arm_neon.h" | ||
26 | +#endif | ||
27 | + | ||
28 | |||
29 | #define ENABLE_MIXER_THREADS 1 | ||
30 | |||
31 | @@ -794,20 +798,42 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src) | ||
32 | if (minbuf > src->buf_count) | ||
33 | minbuf = src->buf_count; | ||
34 | for (i=0; i < minbuf; i++) { | ||
35 | +#if defined(__ARM_NEON__) && defined(WITH_FLOAT) | ||
36 | + for (j=0; j < scount; j+=4) { | ||
37 | + float32x4_t vleft = vld1q_f32(&dest->left_buf[i][j]); | ||
38 | + float32x4_t vright = vld1q_f32(&dest->right_buf[i][j]); | ||
39 | + vleft = vaddq_f32(vleft, vld1q_f32(&src->left_buf[i][j])); | ||
40 | + vright = vaddq_f32(vright, vld1q_f32(&src->right_buf[i][j])); | ||
41 | + vst1q_f32(&dest->left_buf[i][j], vleft); | ||
42 | + vst1q_f32(&dest->right_buf[i][j], vright); | ||
43 | + } | ||
44 | +#else | ||
45 | for (j=0; j < scount; j++) { | ||
46 | dest->left_buf[i][j] += src->left_buf[i][j]; | ||
47 | dest->right_buf[i][j] += src->right_buf[i][j]; | ||
48 | } | ||
49 | +#endif | ||
50 | } | ||
51 | |||
52 | minbuf = dest->fx_buf_count; | ||
53 | if (minbuf > src->fx_buf_count) | ||
54 | minbuf = src->fx_buf_count; | ||
55 | for (i=0; i < minbuf; i++) { | ||
56 | +#if defined(__ARM_NEON__) && defined(WITH_FLOAT) | ||
57 | + for (j=0; j < scount; j+=4) { | ||
58 | + float32x4_t vleft = vld1q_f32(&dest->fx_left_buf[i][j]); | ||
59 | + float32x4_t vright = vld1q_f32(&dest->fx_right_buf[i][j]); | ||
60 | + vleft = vaddq_f32(vleft, vld1q_f32(&src->fx_left_buf[i][j])); | ||
61 | + vright = vaddq_f32(vright, vld1q_f32(&src->fx_right_buf[i][j])); | ||
62 | + vst1q_f32(&dest->fx_left_buf[i][j], vleft); | ||
63 | + vst1q_f32(&dest->fx_right_buf[i][j], vright); | ||
64 | + } | ||
65 | +#else | ||
66 | for (j=0; j < scount; j++) { | ||
67 | dest->fx_left_buf[i][j] += src->fx_left_buf[i][j]; | ||
68 | dest->fx_right_buf[i][j] += src->fx_right_buf[i][j]; | ||
69 | } | ||
70 | +#endif | ||
71 | } | ||
72 | } | ||
73 | |||
74 | -- | ||
75 | 2.9.5 | ||
76 | |||