aboutsummaryrefslogtreecommitdiffstats
path: root/meta-multimedia/recipes-multimedia/fluidsynth/files/0001-Use-ARM-NEON-accelaration-for-float-multithreaded-se.patch
blob: 0e1846e31c6f7e5f85f531e97752e73e9b8912ed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
From 2de7e128fbdf528716b500cf27ed9a4358c931c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
Date: Fri, 24 Nov 2017 00:05:35 +0100
Subject: [PATCH 2/2] Use ARM-NEON accelaration for float-multithreaded setups
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Upstream-Status: Pending

Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
---
 src/rvoice/fluid_rvoice_mixer.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/src/rvoice/fluid_rvoice_mixer.c b/src/rvoice/fluid_rvoice_mixer.c
index 9616518..dbf8057 100644
--- a/src/rvoice/fluid_rvoice_mixer.c
+++ b/src/rvoice/fluid_rvoice_mixer.c
@@ -27,6 +27,10 @@
 #include "fluid_ladspa.h"
 #include "fluid_synth.h"
 
+#if defined(__ARM_NEON__)
+#include "arm_neon.h"
+#endif
+
 
 #define ENABLE_MIXER_THREADS 1
 
@@ -794,20 +798,42 @@ fluid_mixer_buffers_mix(fluid_mixer_buffers_t* dest, fluid_mixer_buffers_t* src)
   if (minbuf > src->buf_count)
     minbuf = src->buf_count;
   for (i=0; i < minbuf; i++) {
+#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
+    for (j=0; j < scount; j+=4) {
+        float32x4_t vleft = vld1q_f32(&dest->left_buf[i][j]);
+        float32x4_t vright = vld1q_f32(&dest->right_buf[i][j]);
+        vleft = vaddq_f32(vleft, vld1q_f32(&src->left_buf[i][j]));
+        vright = vaddq_f32(vright, vld1q_f32(&src->right_buf[i][j]));
+        vst1q_f32(&dest->left_buf[i][j], vleft);
+        vst1q_f32(&dest->right_buf[i][j], vright);
+    }
+#else
     for (j=0; j < scount; j++) {
       dest->left_buf[i][j] += src->left_buf[i][j];
       dest->right_buf[i][j] += src->right_buf[i][j];
     }
+#endif
   }
 
   minbuf = dest->fx_buf_count;
   if (minbuf > src->fx_buf_count)
     minbuf = src->fx_buf_count;
   for (i=0; i < minbuf; i++) {
+#if defined(__ARM_NEON__) && defined(WITH_FLOAT)
+    for (j=0; j < scount; j+=4) {
+        float32x4_t vleft = vld1q_f32(&dest->fx_left_buf[i][j]);
+        float32x4_t vright = vld1q_f32(&dest->fx_right_buf[i][j]);
+        vleft = vaddq_f32(vleft, vld1q_f32(&src->fx_left_buf[i][j]));
+        vright = vaddq_f32(vright, vld1q_f32(&src->fx_right_buf[i][j]));
+        vst1q_f32(&dest->fx_left_buf[i][j], vleft);
+        vst1q_f32(&dest->fx_right_buf[i][j], vright);
+    }
+#else
     for (j=0; j < scount; j++) {
       dest->fx_left_buf[i][j] += src->fx_left_buf[i][j];
       dest->fx_right_buf[i][j] += src->fx_right_buf[i][j];
     }
+#endif
   }
 }
 
-- 
2.9.5