diff options
Diffstat (limited to 'recipes')
14 files changed, 448 insertions, 470 deletions
diff --git a/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch b/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch deleted file mode 100644 index 178dad99bc..0000000000 --- a/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch +++ /dev/null @@ -1,59 +0,0 @@ -From e3bfd272cf813b8419757a3b59128b3568e5f800 Mon Sep 17 00:00:00 2001 -From: Siarhei Siamashka <siarhei.siamashka@nokia.com> -Date: Mon, 20 Sep 2010 19:07:33 +0300 -Subject: [PATCH 2/9] Don't discriminate PAD and REFLECT repeat in standard fast paths - -Without this fix, setting PAD repeat on a source image prevents -the use of any nonscaled standard fast paths, affecting performance -a lot. But as long as no pixels outside the source image boundaries -are touched by the compositing operation, all the repeat types -behave the same and can take the same fast paths. - -This is important because setting PAD repeat instead of NONE is -more hardware acceleration friendly (for the drivers implementing -RENDER extension) and does not inhibit OVER->SRC operator -optimization in pixman. ---- - pixman/pixman-image.c | 6 ++---- - pixman/pixman-private.h | 2 -- - 2 files changed, 2 insertions(+), 6 deletions(-) - -diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c -index 8397f6a..14a2219 100644 ---- a/pixman/pixman-image.c -+++ b/pixman/pixman-image.c -@@ -363,16 +363,14 @@ compute_image_info (pixman_image_t *image) - flags |= - FAST_PATH_NO_PAD_REPEAT | - FAST_PATH_NO_NONE_REPEAT | -- FAST_PATH_NO_NORMAL_REPEAT | -- FAST_PATH_COVERS_CLIP; -+ FAST_PATH_NO_NORMAL_REPEAT; - break; - - case PIXMAN_REPEAT_PAD: - flags |= - FAST_PATH_NO_REFLECT_REPEAT | - FAST_PATH_NO_NONE_REPEAT | -- FAST_PATH_NO_NORMAL_REPEAT | -- FAST_PATH_COVERS_CLIP; -+ FAST_PATH_NO_NORMAL_REPEAT; - break; - - default: -diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h -index 564f8f0..440ae7a 100644 ---- a/pixman/pixman-private.h -+++ b/pixman/pixman-private.h -@@ -602,8 +602,6 @@ _pixman_choose_implementation (void); - (FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_NO_ALPHA_MAP | \ - FAST_PATH_NO_CONVOLUTION_FILTER | \ -- FAST_PATH_NO_PAD_REPEAT | \ -- FAST_PATH_NO_REFLECT_REPEAT | \ - FAST_PATH_NO_ACCESSORS | \ - FAST_PATH_NARROW_FORMAT | \ - FAST_PATH_COVERS_CLIP) --- -1.6.6.1 - diff --git a/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch b/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch deleted file mode 100644 index d62f12dd5c..0000000000 --- a/recipes/xorg-lib/pixman-0.19.4/0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch +++ /dev/null @@ -1,96 +0,0 @@ -From c3f1715c4698e90396d02f7b5acc314d99780941 Mon Sep 17 00:00:00 2001 -From: Siarhei Siamashka <siarhei.siamashka@nokia.com> -Date: Thu, 23 Sep 2010 22:28:55 +0300 -Subject: [PATCH 7/9] ARM: added 'neon_composite_add_0565_8_0565' fast path - -TODO: That's an initial variant, needs performance tuning ---- - pixman/pixman-arm-neon-asm.S | 52 ++++++++++++++++++++++++++++++++++++++++++ - pixman/pixman-arm-neon.c | 4 +++ - 2 files changed, 56 insertions(+), 0 deletions(-) - -diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index e1a697e..23ddae9 100644 ---- a/pixman/pixman-arm-neon-asm.S -+++ b/pixman/pixman-arm-neon-asm.S -@@ -1890,3 +1890,55 @@ generate_composite_function \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ -+ -+/******************************************************************************/ -+ -+.macro pixman_composite_add_0565_8_0565_process_pixblock_head -+ /* mask is in d15 */ -+ convert_0565_to_x888 q4, d2, d1, d0 -+ convert_0565_to_x888 q5, d6, d5, d4 -+ /* source pixel data is in {d0, d1, d2, XX} */ -+ /* destination pixel data is in {d4, d5, d6, XX} */ -+ vmull.u8 q6, d15, d2 -+ vmull.u8 q5, d15, d1 -+ vmull.u8 q4, d15, d0 -+ vrshr.u16 q12, q6, #8 -+ vrshr.u16 q11, q5, #8 -+ vrshr.u16 q10, q4, #8 -+ vraddhn.u16 d2, q6, q12 -+ vraddhn.u16 d1, q5, q11 -+ vraddhn.u16 d0, q4, q10 -+.endm -+ -+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail -+ vqadd.u8 q0, q0, q2 -+ vqadd.u8 q1, q1, q3 -+ /* 32bpp result is in {d0, d1, d2, XX} */ -+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -+.endm -+ -+/* TODO: expand macros and do better instructions scheduling */ -+.macro pixman_composite_add_0565_8_0565_process_pixblock_tail_head -+ vld1.8 {d15}, [MASK]! -+ pixman_composite_add_0565_8_0565_process_pixblock_tail -+ vld1.16 {d8, d9}, [SRC]! -+ vld1.16 {d10, d11}, [DST_R, :128]! -+ cache_preload 8, 8 -+ pixman_composite_add_0565_8_0565_process_pixblock_head -+ vst1.16 {d28, d29}, [DST_W, :128]! -+.endm -+ -+generate_composite_function \ -+ pixman_composite_add_0565_8_0565_asm_neon, 16, 8, 16, \ -+ FLAG_DST_READWRITE, \ -+ 8, /* number of pixels, processed in a single block */ \ -+ 5, /* prefetch distance */ \ -+ default_init_need_all_regs, \ -+ default_cleanup_need_all_regs, \ -+ pixman_composite_add_0565_8_0565_process_pixblock_head, \ -+ pixman_composite_add_0565_8_0565_process_pixblock_tail, \ -+ pixman_composite_add_0565_8_0565_process_pixblock_tail_head, \ -+ 28, /* dst_w_basereg */ \ -+ 10, /* dst_r_basereg */ \ -+ 8, /* src_basereg */ \ -+ 15 /* mask_basereg */ -diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index db1c2df..a8be7e4 100644 ---- a/pixman/pixman-arm-neon.c -+++ b/pixman/pixman-arm-neon.c -@@ -82,6 +82,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888, - - PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8, - uint8_t, 1, uint8_t, 1, uint8_t, 1) -+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_0565_8_0565, -+ uint16_t, 1, uint8_t, 1, uint16_t, 1) - PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888, - uint32_t, 1, uint32_t, 1, uint32_t, 1) - PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888, -@@ -296,6 +298,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = - PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, a8b8g8r8, neon_composite_src_x888_8888), - PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, neon_composite_add_n_8_8), - PIXMAN_STD_FAST_PATH (ADD, a8, a8, a8, neon_composite_add_8_8_8), -+ PIXMAN_STD_FAST_PATH (ADD, r5g6b5, a8, r5g6b5, neon_composite_add_0565_8_0565), -+ PIXMAN_STD_FAST_PATH (ADD, b5g6r5, a8, b5g6r5, neon_composite_add_0565_8_0565), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_add_8888_8888_8888), - PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, neon_composite_add_8000_8000), - PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, neon_composite_add_8888_8888), --- -1.6.6.1 - diff --git a/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch b/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch deleted file mode 100644 index e720a6a428..0000000000 --- a/recipes/xorg-lib/pixman-0.19.4/0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 89cbe6eb5df2a1c85ba996caea6479e2434d51a5 Mon Sep 17 00:00:00 2001 -From: Siarhei Siamashka <siarhei.siamashka@nokia.com> -Date: Thu, 23 Sep 2010 23:09:46 +0300 -Subject: [PATCH 8/9] ARM: added 'neon_composite_out_reverse_0565_8_0565' fast path - -TODO: That's an initial variant, needs performance tuning ---- - pixman/pixman-arm-neon-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++ - pixman/pixman-arm-neon.c | 4 ++ - 2 files changed, 70 insertions(+), 0 deletions(-) - -diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index 23ddae9..7a599f0 100644 ---- a/pixman/pixman-arm-neon-asm.S -+++ b/pixman/pixman-arm-neon-asm.S -@@ -1942,3 +1942,69 @@ generate_composite_function \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ -+ -+/******************************************************************************/ -+ -+.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_head -+ /* mask is in d15 */ -+ convert_0565_to_8888 q4, d3, d2, d1, d0 -+ convert_0565_to_x888 q5, d6, d5, d4 -+ /* source pixel data is in {d0, d1, d2, d3} */ -+ /* destination pixel data is in {d4, d5, d6, xx} */ -+ -+ /* 'in' */ -+ vmull.u8 q8, d15, d3 -+ vmull.u8 q6, d15, d2 -+ vmull.u8 q5, d15, d1 -+ vmull.u8 q4, d15, d0 -+ vrshr.u16 q13, q8, #8 -+ vrshr.u16 q12, q6, #8 -+ vrshr.u16 q11, q5, #8 -+ vrshr.u16 q10, q4, #8 -+ vraddhn.u16 d3, q8, q13 -+ vraddhn.u16 d2, q6, q12 -+ vraddhn.u16 d1, q5, q11 -+ vraddhn.u16 d0, q4, q10 -+ vmvn.8 d24, d3 /* get inverted alpha */ -+ /* now do alpha blending */ -+ vmull.u8 q8, d24, d4 -+ vmull.u8 q9, d24, d5 -+ vmull.u8 q10, d24, d6 -+.endm -+ -+.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail -+ vrshr.u16 q14, q8, #8 -+ vrshr.u16 q15, q9, #8 -+ vrshr.u16 q12, q10, #8 -+ vraddhn.u16 d0, q14, q8 -+ vraddhn.u16 d1, q15, q9 -+ vraddhn.u16 d2, q12, q10 -+ /* 32bpp result is in {d0, d1, d2, XX} */ -+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -+.endm -+ -+/* TODO: expand macros and do better instructions scheduling */ -+.macro pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail_head -+ vld1.8 {d15}, [MASK]! -+ pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail -+ vld1.16 {d8, d9}, [SRC]! -+ vld1.16 {d10, d11}, [DST_R, :128]! -+ cache_preload 8, 8 -+ pixman_composite_out_reverse_0565_8_0565_process_pixblock_head -+ vst1.16 {d28, d29}, [DST_W, :128]! -+.endm -+ -+generate_composite_function \ -+ pixman_composite_out_reverse_0565_8_0565_asm_neon, 16, 8, 16, \ -+ FLAG_DST_READWRITE, \ -+ 8, /* number of pixels, processed in a single block */ \ -+ 5, /* prefetch distance */ \ -+ default_init_need_all_regs, \ -+ default_cleanup_need_all_regs, \ -+ pixman_composite_out_reverse_0565_8_0565_process_pixblock_head, \ -+ pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail, \ -+ pixman_composite_out_reverse_0565_8_0565_process_pixblock_tail_head, \ -+ 28, /* dst_w_basereg */ \ -+ 10, /* dst_r_basereg */ \ -+ 8, /* src_basereg */ \ -+ 15 /* mask_basereg */ -diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index a8be7e4..da1fdeb 100644 ---- a/pixman/pixman-arm-neon.c -+++ b/pixman/pixman-arm-neon.c -@@ -94,6 +94,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565, - uint32_t, 1, uint8_t, 1, uint16_t, 1) - PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565, - uint16_t, 1, uint8_t, 1, uint16_t, 1) -+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, out_reverse_0565_8_0565, -+ uint16_t, 1, uint8_t, 1, uint16_t, 1) - - void - pixman_composite_src_n_8_asm_neon (int32_t w, -@@ -306,6 +308,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = - PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, neon_composite_add_8888_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), -+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, r5g6b5, a8, r5g6b5, neon_composite_out_reverse_0565_8_0565), -+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, b5g6r5, a8, b5g6r5, neon_composite_out_reverse_0565_8_0565), - - { PIXMAN_OP_NONE }, - }; --- -1.6.6.1 - diff --git a/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch b/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch deleted file mode 100644 index 1c7f6ec81d..0000000000 --- a/recipes/xorg-lib/pixman-0.19.4/0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch +++ /dev/null @@ -1,94 +0,0 @@ -From 130211983628678ccee23535765994aa0b9d5122 Mon Sep 17 00:00:00 2001 -From: Siarhei Siamashka <siarhei.siamashka@nokia.com> -Date: Thu, 23 Sep 2010 23:41:50 +0300 -Subject: [PATCH 9/9] ARM: added 'neon_composite_out_reverse_8_0565' fast path - -TODO: That's an initial variant, needs performance tuning ---- - pixman/pixman-arm-neon-asm.S | 50 ++++++++++++++++++++++++++++++++++++++++++ - pixman/pixman-arm-neon.c | 4 +++ - 2 files changed, 54 insertions(+), 0 deletions(-) - -diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index 7a599f0..2234b1b 100644 ---- a/pixman/pixman-arm-neon-asm.S -+++ b/pixman/pixman-arm-neon-asm.S -@@ -2008,3 +2008,53 @@ generate_composite_function \ - 10, /* dst_r_basereg */ \ - 8, /* src_basereg */ \ - 15 /* mask_basereg */ -+ -+/******************************************************************************/ -+ -+.macro pixman_composite_out_reverse_8_0565_process_pixblock_head -+ /* mask is in d15 */ -+ convert_0565_to_x888 q5, d6, d5, d4 -+ /* destination pixel data is in {d4, d5, d6, xx} */ -+ vmvn.8 d24, d15 /* get inverted alpha */ -+ /* now do alpha blending */ -+ vmull.u8 q8, d24, d4 -+ vmull.u8 q9, d24, d5 -+ vmull.u8 q10, d24, d6 -+.endm -+ -+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail -+ vrshr.u16 q14, q8, #8 -+ vrshr.u16 q15, q9, #8 -+ vrshr.u16 q12, q10, #8 -+ vraddhn.u16 d0, q14, q8 -+ vraddhn.u16 d1, q15, q9 -+ vraddhn.u16 d2, q12, q10 -+ /* 32bpp result is in {d0, d1, d2, XX} */ -+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3 -+.endm -+ -+/* TODO: expand macros and do better instructions scheduling */ -+.macro pixman_composite_out_reverse_8_0565_process_pixblock_tail_head -+ vld1.8 {d15}, [SRC]! -+ pixman_composite_out_reverse_8_0565_process_pixblock_tail -+ vld1.16 {d10, d11}, [DST_R, :128]! -+ cache_preload 8, 8 -+ pixman_composite_out_reverse_8_0565_process_pixblock_head -+ vst1.16 {d28, d29}, [DST_W, :128]! -+.endm -+ -+generate_composite_function \ -+ pixman_composite_out_reverse_8_0565_asm_neon, 8, 0, 16, \ -+ FLAG_DST_READWRITE, \ -+ 8, /* number of pixels, processed in a single block */ \ -+ 5, /* prefetch distance */ \ -+ default_init_need_all_regs, \ -+ default_cleanup_need_all_regs, \ -+ pixman_composite_out_reverse_8_0565_process_pixblock_head, \ -+ pixman_composite_out_reverse_8_0565_process_pixblock_tail, \ -+ pixman_composite_out_reverse_8_0565_process_pixblock_tail_head, \ -+ 28, /* dst_w_basereg */ \ -+ 10, /* dst_r_basereg */ \ -+ 15, /* src_basereg */ \ -+ 0 /* mask_basereg */ -+ -diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index da1fdeb..2e37a4a 100644 ---- a/pixman/pixman-arm-neon.c -+++ b/pixman/pixman-arm-neon.c -@@ -60,6 +60,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565, - uint32_t, 1, uint16_t, 1) - PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888, - uint32_t, 1, uint32_t, 1) -+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, out_reverse_8_0565, -+ uint8_t, 1, uint16_t, 1) - - PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565, - uint16_t, 1) -@@ -310,6 +312,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] = - PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, neon_composite_over_reverse_n_8888), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, r5g6b5, a8, r5g6b5, neon_composite_out_reverse_0565_8_0565), - PIXMAN_STD_FAST_PATH (OUT_REVERSE, b5g6r5, a8, b5g6r5, neon_composite_out_reverse_0565_8_0565), -+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, r5g6b5, neon_composite_out_reverse_8_0565), -+ PIXMAN_STD_FAST_PATH (OUT_REVERSE, a8, null, b5g6r5, neon_composite_out_reverse_8_0565), - - { PIXMAN_OP_NONE }, - }; --- -1.6.6.1 - diff --git a/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.19.6/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch index ca63cadc4b..795cce5bad 100644 --- a/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch +++ b/recipes/xorg-lib/pixman-0.19.6/0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch @@ -1,7 +1,7 @@ -From c5099dabb417cab343185d6e22ae4925e53a756f Mon Sep 17 00:00:00 2001 +From 97b2bb933455f222b392b5c60a8bde82d7d6329f Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Tue, 16 Mar 2010 16:55:28 +0100 -Subject: [PATCH 3/9] Generic C implementation of pixman_blt with overlapping support +Subject: [PATCH 1/8] Generic C implementation of pixman_blt with overlapping support Uses memcpy/memmove functions to copy pixels, can handle the case when both source and destination areas are in the same @@ -51,7 +51,7 @@ index 4d234a0..c4d2c14 100644 static pixman_bool_t diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h -index 440ae7a..aac2067 100644 +index c43172b..f980454 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -10,6 +10,7 @@ @@ -62,7 +62,7 @@ index 440ae7a..aac2067 100644 #include <assert.h> #include <stdio.h> #include <string.h> -@@ -883,4 +884,46 @@ void pixman_timer_register (pixman_timer_t *timer); +@@ -873,4 +874,46 @@ void pixman_timer_register (pixman_timer_t *timer); #endif /* PIXMAN_TIMERS */ diff --git a/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.19.6/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch index b8323831e8..6e2d492aaf 100644 --- a/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch +++ b/recipes/xorg-lib/pixman-0.19.6/0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch @@ -1,14 +1,14 @@ -From f8c3deb1f7a26992fe217d1748a1fa5c832bbbd2 Mon Sep 17 00:00:00 2001 +From 47b31f936641da07431093ede340465625bfcb3d Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 22 Oct 2009 05:45:47 +0300 -Subject: [PATCH 4/9] Support of overlapping src/dst for pixman_blt_mmx +Subject: [PATCH 2/8] Support of overlapping src/dst for pixman_blt_mmx --- pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++--------------------- 1 files changed, 32 insertions(+), 23 deletions(-) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c -index b284cd3..5b6afaa 100644 +index e936c4c..2413197 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits, diff --git a/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.19.6/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch index 71b5fded02..910f62e6dd 100644 --- a/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch +++ b/recipes/xorg-lib/pixman-0.19.6/0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch @@ -1,17 +1,17 @@ -From 79fe7f347fe396aa2c917a1928fc18ab9321336c Mon Sep 17 00:00:00 2001 +From 13be027637602fffda3b3cb6e171d8d6a67b3b4b Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 22 Oct 2009 05:45:54 +0300 -Subject: [PATCH 5/9] Support of overlapping src/dst for pixman_blt_sse2 +Subject: [PATCH 3/8] Support of overlapping src/dst for pixman_blt_sse2 --- pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++-------------------- 1 files changed, 32 insertions(+), 23 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c -index 33d71ee..dcd6dcd 100644 +index 5907de0..25015ae 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c -@@ -5614,34 +5614,43 @@ pixman_blt_sse2 (uint32_t *src_bits, +@@ -5027,34 +5027,43 @@ pixman_blt_sse2 (uint32_t *src_bits, { uint8_t * src_bytes; uint8_t * dst_bytes; @@ -76,16 +76,16 @@ index 33d71ee..dcd6dcd 100644 + } } - cache_prefetch ((__m128i*)src_bytes); -@@ -5654,7 +5663,7 @@ pixman_blt_sse2 (uint32_t *src_bits, + while (height--) +@@ -5064,7 +5073,7 @@ pixman_blt_sse2 (uint32_t *src_bits, uint8_t *d = dst_bytes; src_bytes += src_stride; dst_bytes += dst_stride; - w = byte_width; + w = width; - cache_prefetch_next ((__m128i*)s); - cache_prefetch_next ((__m128i*)d); + while (w >= 2 && ((unsigned long)d & 3)) + { -- 1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.19.6/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch index 8992c05697..f6aa9792b8 100644 --- a/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch +++ b/recipes/xorg-lib/pixman-0.19.6/0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch @@ -1,17 +1,17 @@ -From ea0f7b1ae605bb57ca23e88b38b9c19390596723 Mon Sep 17 00:00:00 2001 +From a913cc05a1a1c5a813cf06d248334edede9caab7 Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Wed, 18 Nov 2009 06:08:48 +0200 -Subject: [PATCH 6/9] Support of overlapping src/dst for pixman_blt_neon +Subject: [PATCH 4/8] Support of overlapping src/dst for pixman_blt_neon --- pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++-------- 1 files changed, 51 insertions(+), 11 deletions(-) diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index e0d2001..db1c2df 100644 +index be5d403..cbfd7cf 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c -@@ -172,26 +172,66 @@ pixman_blt_neon (uint32_t *src_bits, +@@ -176,26 +176,66 @@ pixman_blt_neon (uint32_t *src_bits, int width, int height) { diff --git a/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch b/recipes/xorg-lib/pixman-0.19.6/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch index 2ff71ae2d8..dbe98b38ce 100644 --- a/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch +++ b/recipes/xorg-lib/pixman-0.19.6/0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch @@ -1,61 +1,18 @@ -From 38aabb3be87ea68e37f34256c778d07f62680ec6 Mon Sep 17 00:00:00 2001 +From f75e9d1868e21dd75ff3a2ca3561546d23877ddb Mon Sep 17 00:00:00 2001 From: Siarhei Siamashka <siarhei.siamashka@nokia.com> Date: Thu, 10 Dec 2009 00:51:50 +0200 -Subject: [PATCH 1/9] ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline +Subject: [PATCH 5/8] ARM: added NEON optimizations for fetch/store r5g6b5 scanline --- - pixman/pixman-access.c | 23 ++++++++++++++++++++++- pixman/pixman-arm-neon-asm.S | 20 ++++++++++++++++++++ pixman/pixman-arm-neon.c | 40 ++++++++++++++++++++++++++++++++++++++++ - pixman/pixman-private.h | 5 +++++ - 4 files changed, 87 insertions(+), 1 deletions(-) + 2 files changed, 60 insertions(+), 0 deletions(-) -diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c -index f1ce0ba..b33da29 100644 ---- a/pixman/pixman-access.c -+++ b/pixman/pixman-access.c -@@ -2836,7 +2836,7 @@ typedef struct - store_scanline_ ## format, store_scanline_generic_64 \ - } - --static const format_info_t accessors[] = -+static format_info_t accessors[] = - { - /* 32 bpp formats */ - FORMAT_INFO (a8r8g8b8), -@@ -2978,6 +2978,27 @@ _pixman_bits_image_setup_accessors (bits_image_t *image) - setup_accessors (image); - } - -+void -+_pixman_bits_override_accessors (pixman_format_code_t format, -+ fetch_scanline_t fetch_func, -+ store_scanline_t store_func) -+{ -+ format_info_t *info = accessors; -+ -+ while (info->format != PIXMAN_null) -+ { -+ if (info->format == format) -+ { -+ if (fetch_func) -+ info->fetch_scanline_32 = fetch_func; -+ if (store_func) -+ info->store_scanline_32 = store_func; -+ return; -+ } -+ info++; -+ } -+} -+ - #else - - void diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S -index 9f6568f..e1a697e 100644 +index e4db5cd..c79ba81 100644 --- a/pixman/pixman-arm-neon-asm.S +++ b/pixman/pixman-arm-neon-asm.S -@@ -458,6 +458,16 @@ generate_composite_function \ +@@ -459,6 +459,16 @@ generate_composite_function \ pixman_composite_src_8888_0565_process_pixblock_tail, \ pixman_composite_src_8888_0565_process_pixblock_tail_head @@ -72,7 +29,7 @@ index 9f6568f..e1a697e 100644 /******************************************************************************/ .macro pixman_composite_src_0565_8888_process_pixblock_head -@@ -493,6 +503,16 @@ generate_composite_function \ +@@ -494,6 +504,16 @@ generate_composite_function \ pixman_composite_src_0565_8888_process_pixblock_tail, \ pixman_composite_src_0565_8888_process_pixblock_tail_head @@ -88,12 +45,12 @@ index 9f6568f..e1a697e 100644 + /******************************************************************************/ - .macro pixman_composite_add_8000_8000_process_pixblock_head + .macro pixman_composite_add_8_8_process_pixblock_head diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c -index ece6054..e0d2001 100644 +index cbfd7cf..f88c8f8 100644 --- a/pixman/pixman-arm-neon.c +++ b/pixman/pixman-arm-neon.c -@@ -344,6 +344,42 @@ BIND_COMBINE_U (over) +@@ -392,6 +392,42 @@ BIND_COMBINE_U (over) BIND_COMBINE_U (add) BIND_COMBINE_U (out_reverse) @@ -136,7 +93,7 @@ index ece6054..e0d2001 100644 pixman_implementation_t * _pixman_implementation_create_arm_neon (void) { -@@ -355,6 +391,10 @@ _pixman_implementation_create_arm_neon (void) +@@ -407,6 +443,10 @@ _pixman_implementation_create_arm_neon (void) imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; @@ -147,22 +104,6 @@ index ece6054..e0d2001 100644 imp->blt = arm_neon_blt; imp->fill = arm_neon_fill; -diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h -index d85868f..564f8f0 100644 ---- a/pixman/pixman-private.h -+++ b/pixman/pixman-private.h -@@ -206,6 +206,11 @@ void - _pixman_bits_image_setup_accessors (bits_image_t *image); - - void -+_pixman_bits_override_accessors (pixman_format_code_t format, -+ fetch_scanline_t fetch_func, -+ store_scanline_t store_func); -+ -+void - _pixman_image_get_scanline_generic_64 (pixman_image_t *image, - int x, - int y, -- 1.6.6.1 diff --git a/recipes/xorg-lib/pixman-0.19.6/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch b/recipes/xorg-lib/pixman-0.19.6/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch new file mode 100644 index 0000000000..d050646fa7 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.6/0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch @@ -0,0 +1,148 @@ +From a1cd695c5e22f0f4a2b7272fab675a3cc510bacb Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Thu, 23 Sep 2010 21:10:56 +0300 +Subject: [PATCH 6/8] ARM: added NEON optimizations for fetch/store a8 scanline + +--- + pixman/pixman-arm-neon-asm.S | 64 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-neon.c | 42 +++++++++++++++++++++++++++ + 2 files changed, 106 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index c79ba81..ca0825c 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -418,6 +418,70 @@ generate_composite_function \ + + /******************************************************************************/ + ++.macro pixman_composite_src_8_8888_process_pixblock_head ++ /* This is tricky part: we can't set these values just once in 'init' macro ++ * because leading/trailing pixels handling part uses VZIP.8 instructions, ++ * and they operate on values in-place and destroy original registers ++ * content. Think about it like VST4.8 instruction corrupting NEON ++ * registers after write in 'tail_head' macro. Except that 'tail_head' ++ * macro itself actually does not need these extra VMOVs because it uses ++ * real VST4.8 instruction. ++ */ ++ vmov.u8 q0, #0 ++ vmov.u8 d2, #0 ++.endm ++ ++.macro pixman_composite_src_8_8888_process_pixblock_tail ++.endm ++ ++.macro pixman_composite_src_8_8888_process_pixblock_tail_head ++ vst4.8 {d0, d1, d2, d3}, [DST_W, :128]! ++ vld1.8 {d3}, [SRC]! ++.endm ++ ++generate_composite_function_single_scanline \ ++ pixman_fetch_scanline_a8_asm_neon, 8, 0, 32, \ ++ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ default_init, \ ++ default_cleanup, \ ++ pixman_composite_src_8_8888_process_pixblock_head, \ ++ pixman_composite_src_8_8888_process_pixblock_tail, \ ++ pixman_composite_src_8_8888_process_pixblock_tail_head, \ ++ 0, /* dst_w_basereg */ \ ++ 0, /* dst_r_basereg */ \ ++ 3, /* src_basereg */ \ ++ 0 /* mask_basereg */ ++ ++/******************************************************************************/ ++ ++.macro pixman_composite_src_8888_8_process_pixblock_head ++.endm ++ ++.macro pixman_composite_src_8888_8_process_pixblock_tail ++.endm ++ ++.macro pixman_composite_src_8888_8_process_pixblock_tail_head ++ vst1.8 {d3}, [DST_W, :64]! ++ vld4.8 {d0, d1, d2, d3}, [SRC]! ++.endm ++ ++generate_composite_function_single_scanline \ ++ pixman_store_scanline_a8_asm_neon, 32, 0, 8, \ ++ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ default_init, \ ++ default_cleanup, \ ++ pixman_composite_src_8888_8_process_pixblock_head, \ ++ pixman_composite_src_8888_8_process_pixblock_tail, \ ++ pixman_composite_src_8888_8_process_pixblock_tail_head, \ ++ 3, /* dst_w_basereg */ \ ++ 0, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 0 /* mask_basereg */ ++ ++/******************************************************************************/ ++ + .macro pixman_composite_src_8888_0565_process_pixblock_head + vshll.u8 q8, d1, #8 + vshll.u8 q14, d2, #8 +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index f88c8f8..43091d2 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -428,6 +428,45 @@ neon_store_scanline_r5g6b5 (bits_image_t * image, + pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values); + } + ++void ++pixman_fetch_scanline_a8_asm_neon (int width, ++ uint32_t *buffer, ++ const uint8_t *pixel); ++ ++ ++void ++pixman_store_scanline_a8_asm_neon (int width, ++ uint8_t *pixel, ++ const uint32_t *values); ++ ++static void ++neon_fetch_scanline_a8 (pixman_image_t *image, ++ int x, ++ int y, ++ int width, ++ uint32_t * buffer, ++ const uint32_t *mask) ++{ ++ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; ++ const uint8_t *pixel = (const uint8_t *) bits + x; ++ ++ pixman_fetch_scanline_a8_asm_neon (width, buffer, pixel); ++} ++ ++static void ++neon_store_scanline_a8 (bits_image_t * image, ++ int x, ++ int y, ++ int width, ++ const uint32_t *values) ++{ ++ uint32_t *bits = image->bits + image->rowstride * y; ++ uint8_t *pixel = (uint8_t *) bits + x; ++ ++ pixman_store_scanline_a8_asm_neon (width, pixel, values); ++} ++ ++ + pixman_implementation_t * + _pixman_implementation_create_arm_neon (void) + { +@@ -446,6 +485,9 @@ _pixman_implementation_create_arm_neon (void) + _pixman_bits_override_accessors (PIXMAN_r5g6b5, + neon_fetch_scanline_r5g6b5, + neon_store_scanline_r5g6b5); ++ _pixman_bits_override_accessors (PIXMAN_a8, ++ neon_fetch_scanline_a8, ++ neon_store_scanline_a8); + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.6/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch b/recipes/xorg-lib/pixman-0.19.6/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch new file mode 100644 index 0000000000..7f28f47cff --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.6/0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch @@ -0,0 +1,77 @@ +From d6ae7da60cc797900b5eff0786536c4a11ab0f50 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Fri, 24 Sep 2010 18:22:44 +0300 +Subject: [PATCH 7/8] ARM: added NEON optimizations for fetching x8r8g8b8 scanline + +--- + pixman/pixman-arm-neon-asm.S | 14 ++++++++++++++ + pixman/pixman-arm-neon.c | 21 +++++++++++++++++++++ + 2 files changed, 35 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index ca0825c..ffd0b83 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -1206,6 +1206,20 @@ generate_composite_function \ + 0, /* src_basereg */ \ + 0 /* mask_basereg */ + ++generate_composite_function_single_scanline \ ++ pixman_fetch_scanline_x888_asm_neon, 32, 0, 32, \ ++ FLAG_DST_WRITEONLY, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ pixman_composite_src_x888_8888_init, \ ++ default_cleanup, \ ++ pixman_composite_src_x888_8888_process_pixblock_head, \ ++ pixman_composite_src_x888_8888_process_pixblock_tail, \ ++ pixman_composite_src_x888_8888_process_pixblock_tail_head, \ ++ 0, /* dst_w_basereg */ \ ++ 0, /* dst_r_basereg */ \ ++ 0, /* src_basereg */ \ ++ 0 /* mask_basereg */ ++ + /******************************************************************************/ + + .macro pixman_composite_over_n_8_8888_process_pixblock_head +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index 43091d2..f84b5e6 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -466,6 +466,24 @@ neon_store_scanline_a8 (bits_image_t * image, + pixman_store_scanline_a8_asm_neon (width, pixel, values); + } + ++void ++pixman_fetch_scanline_x888_asm_neon (int width, ++ uint32_t *buffer, ++ const uint32_t *pixel); ++ ++static void ++neon_fetch_scanline_x888 (pixman_image_t *image, ++ int x, ++ int y, ++ int width, ++ uint32_t * buffer, ++ const uint32_t *mask) ++{ ++ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; ++ const uint32_t *pixel = (const uint32_t *) bits + x; ++ ++ pixman_fetch_scanline_x888_asm_neon (width, buffer, pixel); ++} + + pixman_implementation_t * + _pixman_implementation_create_arm_neon (void) +@@ -488,6 +506,9 @@ _pixman_implementation_create_arm_neon (void) + _pixman_bits_override_accessors (PIXMAN_a8, + neon_fetch_scanline_a8, + neon_store_scanline_a8); ++ _pixman_bits_override_accessors (PIXMAN_x8r8g8b8, ++ neon_fetch_scanline_x888, ++ NULL); + + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.6/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch b/recipes/xorg-lib/pixman-0.19.6/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch new file mode 100644 index 0000000000..6efdb621ad --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.6/0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch @@ -0,0 +1,172 @@ +From e1191ad6563a1fb02a45982b1c4d7fed3c655e97 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka <siarhei.siamashka@nokia.com> +Date: Mon, 4 Oct 2010 01:56:59 +0300 +Subject: [PATCH 8/8] ARM optimization for scaled src_0565_0565 operation with nearest filter + +The code actually uses only armv4t instructions. + +Benchmark from ARM11: + + == before == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=34.86 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=36.62 MPix/s + +Benchmark from ARM Cortex-A8: + + == before == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s + + == after == + op=1, src_fmt=10020565, dst_fmt=10020565, speed=94.91 MPix/s +--- + pixman/pixman-arm-simd-asm.S | 66 ++++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-arm-simd.c | 37 +++++++++++++++++++++++ + 2 files changed, 103 insertions(+), 0 deletions(-) + +diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S +index a3d2d40..b6f69db 100644 +--- a/pixman/pixman-arm-simd-asm.S ++++ b/pixman/pixman-arm-simd-asm.S +@@ -1,5 +1,6 @@ + /* + * Copyright © 2008 Mozilla Corporation ++ * Copyright © 2010 Nokia Corporation + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that +@@ -328,3 +329,68 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6 + pop {r4, r5, r6, r7, r8, r9, r10, r11} + bx lr + .endfunc ++ ++/* ++ * Note: This function is actually primarily optimized for ARM Cortex-A8 ++ * pipeline. In order to get good performance on ARM9/ARM11 cores (which ++ * don't have efficient write combining), it needs to be changed to use ++ * 16-byte aligned writes using STM instruction. ++ */ ++pixman_asm_function pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 ++ DST .req r0 ++ SRC .req r1 ++ W .req r2 ++ VX .req r3 ++ UNIT_X .req r12 ++ TMP1 .req r4 ++ TMP2 .req r5 ++ MASK .req r6 ++ ldr UNIT_X, [sp] ++ push {r4, r5, r6, r7} ++ mvn MASK, #1 ++ ++ /* define helper macro */ ++ .macro scale_2_pixels ++ ldrh TMP1, [SRC, TMP1] ++ and TMP2, MASK, VX, lsr #15 ++ add VX, VX, UNIT_X ++ strh TMP1, [DST], #2 ++ ++ ldrh TMP2, [SRC, TMP2] ++ and TMP1, MASK, VX, lsr #15 ++ add VX, VX, UNIT_X ++ strh TMP2, [DST], #2 ++ .endm ++ ++ /* now do the scaling */ ++ and TMP1, MASK, VX, lsr #15 ++ add VX, VX, UNIT_X ++ subs W, #4 ++ blt 2f ++1: /* main loop, process 4 pixels per iteration */ ++ scale_2_pixels ++ scale_2_pixels ++ subs W, W, #4 ++ bge 1b ++2: ++ tst W, #2 ++ beq 2f ++ scale_2_pixels ++2: ++ tst W, #1 ++ ldrneh TMP1, [SRC, TMP1] ++ strneh TMP1, [DST], #2 ++ /* cleanup helper macro */ ++ .purgem scale_2_pixels ++ .unreq DST ++ .unreq SRC ++ .unreq W ++ .unreq VX ++ .unreq UNIT_X ++ .unreq TMP1 ++ .unreq TMP2 ++ .unreq MASK ++ /* return */ ++ pop {r4, r5, r6, r7} ++ bx lr ++.endfunc +diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c +index d466a31..f6f464c 100644 +--- a/pixman/pixman-arm-simd.c ++++ b/pixman/pixman-arm-simd.c +@@ -29,6 +29,7 @@ + + #include "pixman-private.h" + #include "pixman-arm-common.h" ++#include "pixman-fast-path.h" + + #if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */ + +@@ -375,6 +376,35 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t width, + + #endif + ++void ++pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 (uint16_t * dst, ++ uint16_t * src, ++ int32_t w, ++ pixman_fixed_t vx, ++ pixman_fixed_t unit_x); ++ ++static force_inline void ++scaled_nearest_scanline_armv6_565_565_SRC (uint16_t * dst, ++ uint16_t * src, ++ int32_t w, ++ pixman_fixed_t vx, ++ pixman_fixed_t unit_x, ++ pixman_fixed_t max_vx) ++{ ++ pixman_scaled_nearest_scanline_565_565_SRC_asm_armv6 (dst, src, w, ++ vx, unit_x); ++} ++ ++FAST_NEAREST_MAINLOOP (armv6_565_565_cover_SRC, ++ scaled_nearest_scanline_armv6_565_565_SRC, ++ uint16_t, uint16_t, COVER); ++FAST_NEAREST_MAINLOOP (armv6_565_565_none_SRC, ++ scaled_nearest_scanline_armv6_565_565_SRC, ++ uint16_t, uint16_t, NONE); ++FAST_NEAREST_MAINLOOP (armv6_565_565_pad_SRC, ++ scaled_nearest_scanline_armv6_565_565_SRC, ++ uint16_t, uint16_t, PAD); ++ + PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8, + uint8_t, 1, uint8_t, 1) + PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888, +@@ -404,6 +434,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] = + PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888), + PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888), + ++ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, r5g6b5, r5g6b5, armv6_565_565), ++ SIMPLE_NEAREST_FAST_PATH_COVER (SRC, b5g6r5, b5g6r5, armv6_565_565), ++ SIMPLE_NEAREST_FAST_PATH_NONE (SRC, r5g6b5, r5g6b5, armv6_565_565), ++ SIMPLE_NEAREST_FAST_PATH_NONE (SRC, b5g6r5, b5g6r5, armv6_565_565), ++ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, r5g6b5, armv6_565_565), ++ SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, b5g6r5, armv6_565_565), ++ + { PIXMAN_OP_NONE }, + }; + +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman_0.19.4.bb b/recipes/xorg-lib/pixman_0.19.4.bb deleted file mode 100644 index b02a7a3c33..0000000000 --- a/recipes/xorg-lib/pixman_0.19.4.bb +++ /dev/null @@ -1,23 +0,0 @@ -require pixman.inc - -SRC_URI[archive.md5sum] = "100a2d23f1d5683fdaa5d7ca71a0182b" -SRC_URI[archive.sha256sum] = "04e613f87fec13e5d6e8540587af1112e9ab19f9d550751e848a2d65deb26fd6" - -PR = "${INC_PR}.1" - -SRC_URI += "\ - file://0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch \ - file://0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch \ - file://0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \ - file://0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \ - file://0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \ - file://0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \ - file://0007-ARM-added-neon_composite_add_0565_8_0565-fast-path.patch \ - file://0008-ARM-added-neon_composite_out_reverse_0565_8_0565-fas.patch \ - file://0009-ARM-added-neon_composite_out_reverse_8_0565-fast-pat.patch \ -" - -NEON = " --disable-arm-neon " -NEON_armv7a = " " - -EXTRA_OECONF = "${NEON} --disable-gtk" diff --git a/recipes/xorg-lib/pixman_0.19.6.bb b/recipes/xorg-lib/pixman_0.19.6.bb new file mode 100644 index 0000000000..984fde2842 --- /dev/null +++ b/recipes/xorg-lib/pixman_0.19.6.bb @@ -0,0 +1,22 @@ +require pixman.inc + +SRC_URI[archive.md5sum] = "3f31cf670880199979d71a3234308cc9" +SRC_URI[archive.sha256sum] = "1bc9f0b00de69e3aeab3525012506608ea3d913eb452d0134c729c1d7abab1b5" + +PR = "${INC_PR}.0" + +SRC_URI += "\ + file://0001-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \ + file://0002-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \ + file://0003-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \ + file://0004-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \ + file://0005-ARM-added-NEON-optimizations-for-fetch-store-r5g6b5-.patch \ + file://0006-ARM-added-NEON-optimizations-for-fetch-store-a8-scan.patch \ + file://0007-ARM-added-NEON-optimizations-for-fetching-x8r8g8b8-s.patch \ + file://0008-ARM-optimization-for-scaled-src_0565_0565-operation-.patch \ +" + +NEON = " --disable-arm-neon " +NEON_armv7a = " " + +EXTRA_OECONF = "${NEON} --disable-gtk" |