diff options
author | Andreas Müller <schnitzeltony@googlemail.com> | 2017-01-25 21:06:02 +0100 |
---|---|---|
committer | Martin Jansa <Martin.Jansa@gmail.com> | 2017-02-13 18:43:23 +0100 |
commit | d307c4f59deb22cc8dfecb88720b5162f39d895c (patch) | |
tree | 41bcc9e75f11dbf6133fd65d2c091fd76a51eaee /meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch | |
parent | 0db057da47354a7a1184ebf1d0c3c6d0ecdc89aa (diff) | |
download | meta-openembedded-contrib-d307c4f59deb22cc8dfecb88720b5162f39d895c.tar.gz |
jack: add ARM NEON support for sample conversions
Add a test application checking accurracy and performance win of accelerated
code.
Signed-off-by: Andreas Müller <schnitzeltony@googlemail.com>
Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
Diffstat (limited to 'meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch')
-rw-r--r-- | meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch | 433 |
1 files changed, 433 insertions, 0 deletions
diff --git a/meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch b/meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch new file mode 100644 index 0000000000..e0c9e8ca87 --- /dev/null +++ b/meta-oe/recipes-multimedia/jack/jack/0002-jack_simdtests-add-application-checking-accurracy-an.patch @@ -0,0 +1,433 @@ +From d0543c0628d2c0a6d898c694003e941fa189b393 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@googlemail.com> +Date: Sun, 15 Jan 2017 20:52:20 +0100 +Subject: [PATCH 2/2] jack_simdtests: add application checking accurracy and + performance of SIMD optimizations +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Upstream-Status: Submitted [1] + +[1] https://github.com/jackaudio/jack2/pull/250 + +Signed-off-by: Andreas Müller <schnitzeltony@googlemail.com> +--- + example-clients/simdtests.cpp | 390 ++++++++++++++++++++++++++++++++++++++++++ + example-clients/wscript | 3 +- + 2 files changed, 392 insertions(+), 1 deletion(-) + create mode 100644 example-clients/simdtests.cpp + +diff --git a/example-clients/simdtests.cpp b/example-clients/simdtests.cpp +new file mode 100644 +index 0000000..b74d50a +--- /dev/null ++++ b/example-clients/simdtests.cpp +@@ -0,0 +1,390 @@ ++/* ++ * simdtests.c -- test accuraccy and performance of simd optimizations ++ * ++ * Copyright (C) 2017 Andreas Mueller. ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License as published by ++ * the Free Software Foundation; either version 2 of the License, or ++ * (at your option) any later version. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public License ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ++ */ ++ ++/* We must include all headers memops.c includes to avoid trouble with ++ * out namespace game below. ++ */ ++#include <stdio.h> ++#include <string.h> ++#include <math.h> ++#include <memory.h> ++#include <stdlib.h> ++#include <stdint.h> ++#include <limits.h> ++#ifdef __linux__ ++#include <endian.h> ++#endif ++#include "memops.h" ++ ++#if defined (__SSE2__) && !defined (__sun__) ++#include <emmintrin.h> ++#ifdef __SSE4_1__ ++#include <smmintrin.h> ++#endif ++#endif ++ ++#ifdef __ARM_NEON__ ++#include <arm_neon.h> ++#endif ++ ++// our additional headers ++#include <time.h> ++ ++/* Dirty: include mempos.c twice the second time with SIMD disabled ++ * so we can compare aceelerated non accelerated ++ */ ++namespace accelerated { ++#include "../common/memops.c" ++} ++ ++namespace origerated { ++#ifdef __SSE2__ ++#undef __SSE2__ ++#endif ++ ++#ifdef __ARM_NEON__ ++#undef __ARM_NEON__ ++#endif ++ ++#include "../common/memops.c" ++} ++ ++// define conversion function types ++typedef void (*t_jack_to_integer)( ++ char *dst, ++ jack_default_audio_sample_t *src, ++ unsigned long nsamples, ++ unsigned long dst_skip, ++ dither_state_t *state); ++ ++typedef void (*t_integer_to_jack)( ++ jack_default_audio_sample_t *dst, ++ char *src, ++ unsigned long nsamples, ++ unsigned long src_skip); ++ ++// define/setup test case data ++typedef struct test_case_data { ++ uint32_t frame_size; ++ uint32_t sample_size; ++ bool reverse; ++ t_jack_to_integer jack_to_integer_accel; ++ t_jack_to_integer jack_to_integer_orig; ++ t_integer_to_jack integer_to_jack_accel; ++ t_integer_to_jack integer_to_jack_orig; ++ dither_state_t *ditherstate; ++ const char *name; ++} test_case_data_t; ++ ++test_case_data_t test_cases[] = { ++ { ++ 4, ++ 3, ++ true, ++ accelerated::sample_move_d32u24_sSs, ++ origerated::sample_move_d32u24_sSs, ++ accelerated::sample_move_dS_s32u24s, ++ origerated::sample_move_dS_s32u24s, ++ NULL, ++ "32u24s" }, ++ { ++ 4, ++ 3, ++ false, ++ accelerated::sample_move_d32u24_sS, ++ origerated::sample_move_d32u24_sS, ++ accelerated::sample_move_dS_s32u24, ++ origerated::sample_move_dS_s32u24, ++ NULL, ++ "32u24" }, ++ { ++ 3, ++ 3, ++ true, ++ accelerated::sample_move_d24_sSs, ++ origerated::sample_move_d24_sSs, ++ accelerated::sample_move_dS_s24s, ++ origerated::sample_move_dS_s24s, ++ NULL, ++ "24s" }, ++ { ++ 3, ++ 3, ++ false, ++ accelerated::sample_move_d24_sS, ++ origerated::sample_move_d24_sS, ++ accelerated::sample_move_dS_s24, ++ origerated::sample_move_dS_s24, ++ NULL, ++ "24" }, ++ { ++ 2, ++ 2, ++ true, ++ accelerated::sample_move_d16_sSs, ++ origerated::sample_move_d16_sSs, ++ accelerated::sample_move_dS_s16s, ++ origerated::sample_move_dS_s16s, ++ NULL, ++ "16s" }, ++ { ++ 2, ++ 2, ++ false, ++ accelerated::sample_move_d16_sS, ++ origerated::sample_move_d16_sS, ++ accelerated::sample_move_dS_s16, ++ origerated::sample_move_dS_s16, ++ NULL, ++ "16" }, ++}; ++ ++// we need to repeat for better accuracy at time measurement ++const uint32_t retry_per_case = 1000; ++ ++// setup test buffers ++#define TESTBUFF_SIZE 1024 ++jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE]; ++// integer buffers: max 4 bytes per value / * 2 for stereo ++char integerbuffer_accel[TESTBUFF_SIZE*4*2]; ++char integerbuffer_orig[TESTBUFF_SIZE*4*2]; ++// float buffers ++jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE]; ++jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE]; ++ ++// comparing unsigned makes life easier ++uint32_t extract_integer( ++ char* buff, ++ uint32_t offset, ++ uint32_t frame_size, ++ uint32_t sample_size, ++ bool big_endian) ++{ ++ uint32_t retval = 0; ++ unsigned char* curr; ++ uint32_t mult = 1; ++ if(big_endian) { ++ curr = (unsigned char*)buff + offset + sample_size-1; ++ for(uint32_t i=0; i<sample_size; i++) { ++ retval += *(curr--) * mult; ++ mult*=256; ++ } ++ } ++ else { ++ curr = (unsigned char*)buff + offset + frame_size-sample_size; ++ for(uint32_t i=0; i<sample_size; i++) { ++ retval += *(curr++) * mult; ++ mult*=256; ++ } ++ } ++ return retval; ++} ++ ++int main(int argc, char *argv[]) ++{ ++// parse_arguments(argc, argv); ++ uint32_t maxerr_displayed = 10; ++ ++ // fill jackbuffer ++ for(int i=0; i<TESTBUFF_SIZE; i++) { ++ // ramp ++ jack_default_audio_sample_t value = ++ ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2); ++ // force clipping ++ value *= 1.02; ++ jackbuffer_source[i] = value; ++ } ++ ++ for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) { ++ // test mono/stereo ++ for(uint32_t channels=1; channels<=2; channels++) { ++ ////////////////////////////////////////////////////////////////////////////// ++ // jackfloat -> integer ++ ++ // clean target buffers ++ memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel)); ++ memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig)); ++ // accel ++ clock_t time_to_integer_accel = clock(); ++ for(uint32_t repetition=0; repetition<retry_per_case; repetition++) ++ { ++ test_cases[testcase].jack_to_integer_accel( ++ integerbuffer_accel, ++ jackbuffer_source, ++ TESTBUFF_SIZE, ++ test_cases[testcase].frame_size*channels, ++ test_cases[testcase].ditherstate); ++ } ++ float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC; ++ // orig ++ clock_t time_to_integer_orig = clock(); ++ for(uint32_t repetition=0; repetition<retry_per_case; repetition++) ++ { ++ test_cases[testcase].jack_to_integer_orig( ++ integerbuffer_orig, ++ jackbuffer_source, ++ TESTBUFF_SIZE, ++ test_cases[testcase].frame_size*channels, ++ test_cases[testcase].ditherstate); ++ } ++ float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC; ++ // output performance results ++ printf( ++ "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", ++ test_cases[testcase].name, ++ channels, ++ timediff_to_integer_orig, ++ timediff_to_integer_accel, ++ (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0); ++ uint32_t int_deviation_max = 0; ++ uint32_t int_error_count = 0; ++ // output error (avoid spam -> limit error lines per test case) ++ for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) { ++ uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels; ++ // compare both results ++ uint32_t intval_accel=extract_integer( ++ integerbuffer_accel, ++ sample_offset, ++ test_cases[testcase].frame_size, ++ test_cases[testcase].sample_size, ++#if __BYTE_ORDER == __BIG_ENDIAN ++ !test_cases[testcase].reverse); ++#else ++ test_cases[testcase].reverse); ++#endif ++ uint32_t intval_orig=extract_integer( ++ integerbuffer_orig, ++ sample_offset, ++ test_cases[testcase].frame_size, ++ test_cases[testcase].sample_size, ++#if __BYTE_ORDER == __BIG_ENDIAN ++ !test_cases[testcase].reverse); ++#else ++ test_cases[testcase].reverse); ++#endif ++ if(intval_accel != intval_orig) { ++ if(int_error_count<maxerr_displayed) { ++ printf("Value error sample %u:", sample); ++ printf(" Orig 0x"); ++ char formatstr[10]; ++ sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2); ++ printf(formatstr, intval_orig); ++ printf(" Accel 0x"); ++ printf(formatstr, intval_accel); ++ printf("\n"); ++ } ++ int_error_count++; ++ uint32_t int_deviation; ++ if(intval_accel > intval_orig) ++ int_deviation = intval_accel-intval_orig; ++ else ++ int_deviation = intval_orig-intval_accel; ++ if(int_deviation > int_deviation_max) ++ int_deviation_max = int_deviation; ++ } ++ } ++ printf( ++ "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n", ++ test_cases[testcase].name, ++ channels, ++ int_error_count, ++ int_deviation_max); ++ ++ ////////////////////////////////////////////////////////////////////////////// ++ // integer -> jackfloat ++ ++ // clean target buffers ++ memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel)); ++ memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig)); ++ // accel ++ clock_t time_to_float_accel = clock(); ++ for(uint32_t repetition=0; repetition<retry_per_case; repetition++) ++ { ++ test_cases[testcase].integer_to_jack_accel( ++ jackfloatbuffer_accel, ++ integerbuffer_orig, ++ TESTBUFF_SIZE, ++ test_cases[testcase].frame_size*channels); ++ } ++ float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC; ++ // orig ++ clock_t time_to_float_orig = clock(); ++ for(uint32_t repetition=0; repetition<retry_per_case; repetition++) ++ { ++ test_cases[testcase].integer_to_jack_orig( ++ jackfloatbuffer_orig, ++ integerbuffer_orig, ++ TESTBUFF_SIZE, ++ test_cases[testcase].frame_size*channels); ++ } ++ float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC; ++ // output performance results ++ printf( ++ "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", ++ test_cases[testcase].name, ++ channels, ++ timediff_to_float_orig, ++ timediff_to_float_accel, ++ (timediff_to_float_orig/timediff_to_float_accel-1)*100.0); ++ jack_default_audio_sample_t float_deviation_max = 0.0; ++ uint32_t float_error_count = 0; ++ // output error (avoid spam -> limit error lines per test case) ++ for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) { ++ // For easier estimation/readabilty we scale floats back to integer ++ jack_default_audio_sample_t sample_scaling; ++ switch(test_cases[testcase].sample_size) { ++ case 2: ++ sample_scaling = SAMPLE_16BIT_SCALING; ++ break; ++ default: ++ sample_scaling = SAMPLE_24BIT_SCALING; ++ break; ++ } ++ jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling; ++ jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling; ++ // compare both results ++ jack_default_audio_sample_t float_deviation; ++ if(floatval_accel > floatval_orig) ++ float_deviation = floatval_accel-floatval_orig; ++ else ++ float_deviation = floatval_orig-floatval_accel; ++ if(float_deviation > float_deviation_max) ++ float_deviation_max = float_deviation; ++ // deviation > half bit => error ++ if(float_deviation > 0.5) { ++ if(float_error_count<maxerr_displayed) { ++ printf("Value error sample %u:", sample); ++ printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel); ++ } ++ float_error_count++; ++ } ++ } ++ printf( ++ "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n", ++ test_cases[testcase].name, ++ channels, ++ float_error_count, ++ float_deviation_max); ++ ++ printf("\n"); ++ } ++ } ++ return 0; ++} +diff --git a/example-clients/wscript b/example-clients/wscript +index ba67614..1b2f674 100644 +--- a/example-clients/wscript ++++ b/example-clients/wscript +@@ -28,7 +28,8 @@ example_programs = { + 'jack_net_master' : 'netmaster.c', + 'jack_latent_client' : 'latent_client.c', + 'jack_midi_dump' : 'midi_dump.c', +- 'jack_midi_latency_test' : 'midi_latency_test.c' ++ 'jack_midi_latency_test' : 'midi_latency_test.c', ++ 'jack_simdtests' : 'simdtests.cpp' + } + + example_libs = { +-- +2.5.5 + |