diff options
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch')
-rw-r--r-- | meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch new file mode 100644 index 0000000000..7c67001cc5 --- /dev/null +++ b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0007-ARM-NEON-Standard-fast-path-src_n_8_8.patch @@ -0,0 +1,118 @@ +From 34ce640914e06f2e23a0a93a3a49ec0bfff7497b Mon Sep 17 00:00:00 2001 +From: Taekyun Kim <tkq.kim@samsung.com> +Date: Mon, 26 Sep 2011 18:33:27 +0900 +Subject: [PATCH 7/8] ARM: NEON: Standard fast path src_n_8_8 + +Performance numbers of before/after on cortex-a8 @ 1GHz + +- before +L1: 28.05 L2: 28.26 M: 26.97 ( 4.48%) HT: 19.79 VT: 19.14 R: 17.61 RT: 9.88 ( 101Kops/s) + +- after +L1:1430.28 L2:1252.10 M:421.93 ( 75.48%) HT:170.16 VT:138.03 R:145.86 RT: 35.51 ( 255Kops/s) +--- + pixman/pixman-arm-neon-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 3 ++ + 2 files changed, 69 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 1db02db..da8f054 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1292,6 +1292,72 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_src_n_8_8_process_pixblock_head ++ vmull.u8 q0, d24, d16 ++ vmull.u8 q1, d25, d16 ++ vmull.u8 q2, d26, d16 ++ vmull.u8 q3, d27, d16 ++ vrsra.u16 q0, q0, #8 ++ vrsra.u16 q1, q1, #8 ++ vrsra.u16 q2, q2, #8 ++ vrsra.u16 q3, q3, #8 ++.endm ++ ++.macro pixman_composite_src_n_8_8_process_pixblock_tail ++ vrshrn.u16 d28, q0, #8 ++ vrshrn.u16 d29, q1, #8 ++ vrshrn.u16 d30, q2, #8 ++ vrshrn.u16 d31, q3, #8 ++.endm ++ ++.macro pixman_composite_src_n_8_8_process_pixblock_tail_head ++ fetch_mask_pixblock ++ PF add PF_X, PF_X, #8 ++ vrshrn.u16 d28, q0, #8 ++ PF tst PF_CTL, #0x0F ++ vrshrn.u16 d29, q1, #8 ++ PF addne PF_X, PF_X, #8 ++ vrshrn.u16 d30, q2, #8 ++ PF subne PF_CTL, PF_CTL, #1 ++ vrshrn.u16 d31, q3, #8 ++ PF cmp PF_X, ORIG_W ++ vmull.u8 q0, d24, d16 ++ PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift] ++ vmull.u8 q1, d25, d16 ++ PF subge PF_X, PF_X, ORIG_W ++ vmull.u8 q2, d26, d16 ++ PF subges PF_CTL, PF_CTL, #0x10 ++ vmull.u8 q3, d27, d16 ++ PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]! ++ vst1.8 {d28, d29, d30, d31}, [DST_W, :128]! ++ vrsra.u16 q0, q0, #8 ++ vrsra.u16 q1, q1, #8 ++ vrsra.u16 q2, q2, #8 ++ vrsra.u16 q3, q3, #8 ++.endm ++ ++.macro pixman_composite_src_n_8_8_init ++ add DUMMY, sp, #ARGS_STACK_OFFSET ++ vld1.32 {d16[0]}, [DUMMY] ++ vdup.8 d16, d16[3] ++.endm ++ ++.macro pixman_composite_src_n_8_8_cleanup ++.endm ++ ++generate_composite_function \ ++ pixman_composite_src_n_8_8_asm_neon, 0, 8, 8, \ ++ FLAG_DST_WRITEONLY, \ ++ 32, /* number of pixels, processed in a single block */ \ ++ 5, /* prefetch distance */ \ ++ pixman_composite_src_n_8_8_init, \ ++ pixman_composite_src_n_8_8_cleanup, \ ++ pixman_composite_src_n_8_8_process_pixblock_head, \ ++ pixman_composite_src_n_8_8_process_pixblock_tail, \ ++ pixman_composite_src_n_8_8_process_pixblock_tail_head ++ ++/******************************************************************************/ ++ + .macro pixman_composite_over_n_8_8888_process_pixblock_head + /* expecting deinterleaved source data in {d8, d9, d10, d11} */ + /* d8 - blue, d9 - green, d10 - red, d11 - alpha */ +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 3db9adf..ca139de 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -92,6 +92,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8888, + uint8_t, 1, uint32_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8888, + uint8_t, 1, uint32_t, 1) ++PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (0, neon, src_n_8_8, ++ uint8_t, 1, uint8_t, 1) + + PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, neon, over_8888_n_8888, + uint32_t, 1, uint32_t, 1) +@@ -295,6 +297,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, neon_composite_src_n_8_8888), + PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, neon_composite_src_n_8_8888), ++ PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8, neon_composite_src_n_8_8), + + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8, neon_composite_over_n_8_8), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, neon_composite_over_n_8_0565), +-- +1.6.6.1 + |