From 284286b126735d26cb426e159088551d9da96cd0 Mon Sep 17 00:00:00 2001 From: Koen Kooi Date: Wed, 22 Sep 2010 10:21:43 +0200 Subject: pixman 0.19.4: add latest release from the unstable series * forward port the overlapped blit patches * add NEON optimization for 16bpp * fix fast path flags for PAD --- ...ded-NEON-optimizations-for-fetch-store-r5.patch | 168 +++++++++++++++++++++ ...iminate-PAD-and-REFLECT-repeat-in-standar.patch | 59 ++++++++ ...mplementation-of-pixman_blt-with-overlapp.patch | 114 ++++++++++++++ ...of-overlapping-src-dst-for-pixman_blt_mmx.patch | 91 +++++++++++ ...f-overlapping-src-dst-for-pixman_blt_sse2.patch | 91 +++++++++++ ...f-overlapping-src-dst-for-pixman_blt_neon.patch | 94 ++++++++++++ recipes/xorg-lib/pixman_0.19.4.bb | 23 +++ 7 files changed, 640 insertions(+) create mode 100644 recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch create mode 100644 recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch create mode 100644 recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch create mode 100644 recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch create mode 100644 recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch create mode 100644 recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch create mode 100644 recipes/xorg-lib/pixman_0.19.4.bb (limited to 'recipes/xorg-lib') diff --git a/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch b/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch new file mode 100644 index 0000000000..756c418fce --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.4/0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch @@ -0,0 +1,168 @@ +From 38aabb3be87ea68e37f34256c778d07f62680ec6 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Thu, 10 Dec 2009 00:51:50 +0200 +Subject: [PATCH 1/6] ARM: HACK: added NEON optimizations for fetch/store r5g6b5 scanline + +--- + pixman/pixman-access.c | 23 ++++++++++++++++++++++- + pixman/pixman-arm-neon-asm.S | 20 ++++++++++++++++++++ + pixman/pixman-arm-neon.c | 40 ++++++++++++++++++++++++++++++++++++++++ + pixman/pixman-private.h | 5 +++++ + 4 files changed, 87 insertions(+), 1 deletions(-) + +diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c +index f1ce0ba..b33da29 100644 +--- a/pixman/pixman-access.c ++++ b/pixman/pixman-access.c +@@ -2836,7 +2836,7 @@ typedef struct + store_scanline_ ## format, store_scanline_generic_64 \ + } + +-static const format_info_t accessors[] = ++static format_info_t accessors[] = + { + /* 32 bpp formats */ + FORMAT_INFO (a8r8g8b8), +@@ -2978,6 +2978,27 @@ _pixman_bits_image_setup_accessors (bits_image_t *image) + setup_accessors (image); + } + ++void ++_pixman_bits_override_accessors (pixman_format_code_t format, ++ fetch_scanline_t fetch_func, ++ store_scanline_t store_func) ++{ ++ format_info_t *info = accessors; ++ ++ while (info->format != PIXMAN_null) ++ { ++ if (info->format == format) ++ { ++ if (fetch_func) ++ info->fetch_scanline_32 = fetch_func; ++ if (store_func) ++ info->store_scanline_32 = store_func; ++ return; ++ } ++ info++; ++ } ++} ++ + #else + + void +diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S +index 9f6568f..e1a697e 100644 +--- a/pixman/pixman-arm-neon-asm.S ++++ b/pixman/pixman-arm-neon-asm.S +@@ -458,6 +458,16 @@ generate_composite_function \ + pixman_composite_src_8888_0565_process_pixblock_tail, \ + pixman_composite_src_8888_0565_process_pixblock_tail_head + ++generate_composite_function_single_scanline \ ++ pixman_store_scanline_r5g6b5_asm_neon, 32, 0, 16, \ ++ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ default_init, \ ++ default_cleanup, \ ++ pixman_composite_src_8888_0565_process_pixblock_head, \ ++ pixman_composite_src_8888_0565_process_pixblock_tail, \ ++ pixman_composite_src_8888_0565_process_pixblock_tail_head ++ + /******************************************************************************/ + + .macro pixman_composite_src_0565_8888_process_pixblock_head +@@ -493,6 +503,16 @@ generate_composite_function \ + pixman_composite_src_0565_8888_process_pixblock_tail, \ + pixman_composite_src_0565_8888_process_pixblock_tail_head + ++generate_composite_function_single_scanline \ ++ pixman_fetch_scanline_r5g6b5_asm_neon, 16, 0, 32, \ ++ FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \ ++ 8, /* number of pixels, processed in a single block */ \ ++ default_init, \ ++ default_cleanup, \ ++ pixman_composite_src_0565_8888_process_pixblock_head, \ ++ pixman_composite_src_0565_8888_process_pixblock_tail, \ ++ pixman_composite_src_0565_8888_process_pixblock_tail_head ++ + /******************************************************************************/ + + .macro pixman_composite_add_8000_8000_process_pixblock_head +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index ece6054..e0d2001 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -344,6 +344,42 @@ BIND_COMBINE_U (over) + BIND_COMBINE_U (add) + BIND_COMBINE_U (out_reverse) + ++void ++pixman_fetch_scanline_r5g6b5_asm_neon (int width, ++ uint32_t *buffer, ++ const uint16_t *pixel); ++void ++pixman_store_scanline_r5g6b5_asm_neon (int width, ++ uint16_t *pixel, ++ const uint32_t *values); ++ ++static void ++neon_fetch_scanline_r5g6b5 (pixman_image_t *image, ++ int x, ++ int y, ++ int width, ++ uint32_t * buffer, ++ const uint32_t *mask) ++{ ++ const uint32_t *bits = image->bits.bits + y * image->bits.rowstride; ++ const uint16_t *pixel = (const uint16_t *)bits + x; ++ ++ pixman_fetch_scanline_r5g6b5_asm_neon (width, buffer, pixel); ++} ++ ++static void ++neon_store_scanline_r5g6b5 (bits_image_t * image, ++ int x, ++ int y, ++ int width, ++ const uint32_t *values) ++{ ++ uint32_t *bits = image->bits + image->rowstride * y; ++ uint16_t *pixel = ((uint16_t *) bits) + x; ++ ++ pixman_store_scanline_r5g6b5_asm_neon (width, pixel, values); ++} ++ + pixman_implementation_t * + _pixman_implementation_create_arm_neon (void) + { +@@ -355,6 +391,10 @@ _pixman_implementation_create_arm_neon (void) + imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u; + imp->combine_32[PIXMAN_OP_OUT_REVERSE] = neon_combine_out_reverse_u; + ++ _pixman_bits_override_accessors (PIXMAN_r5g6b5, ++ neon_fetch_scanline_r5g6b5, ++ neon_store_scanline_r5g6b5); ++ + imp->blt = arm_neon_blt; + imp->fill = arm_neon_fill; + +diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h +index d85868f..564f8f0 100644 +--- a/pixman/pixman-private.h ++++ b/pixman/pixman-private.h +@@ -206,6 +206,11 @@ void + _pixman_bits_image_setup_accessors (bits_image_t *image); + + void ++_pixman_bits_override_accessors (pixman_format_code_t format, ++ fetch_scanline_t fetch_func, ++ store_scanline_t store_func); ++ ++void + _pixman_image_get_scanline_generic_64 (pixman_image_t *image, + int x, + int y, +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch b/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch new file mode 100644 index 0000000000..324fb41393 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.4/0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch @@ -0,0 +1,59 @@ +From e3bfd272cf813b8419757a3b59128b3568e5f800 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Mon, 20 Sep 2010 19:07:33 +0300 +Subject: [PATCH 2/6] Don't discriminate PAD and REFLECT repeat in standard fast paths + +Without this fix, setting PAD repeat on a source image prevents +the use of any nonscaled standard fast paths, affecting performance +a lot. But as long as no pixels outside the source image boundaries +are touched by the compositing operation, all the repeat types +behave the same and can take the same fast paths. + +This is important because setting PAD repeat instead of NONE is +more hardware acceleration friendly (for the drivers implementing +RENDER extension) and does not inhibit OVER->SRC operator +optimization in pixman. +--- + pixman/pixman-image.c | 6 ++---- + pixman/pixman-private.h | 2 -- + 2 files changed, 2 insertions(+), 6 deletions(-) + +diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c +index 8397f6a..14a2219 100644 +--- a/pixman/pixman-image.c ++++ b/pixman/pixman-image.c +@@ -363,16 +363,14 @@ compute_image_info (pixman_image_t *image) + flags |= + FAST_PATH_NO_PAD_REPEAT | + FAST_PATH_NO_NONE_REPEAT | +- FAST_PATH_NO_NORMAL_REPEAT | +- FAST_PATH_COVERS_CLIP; ++ FAST_PATH_NO_NORMAL_REPEAT; + break; + + case PIXMAN_REPEAT_PAD: + flags |= + FAST_PATH_NO_REFLECT_REPEAT | + FAST_PATH_NO_NONE_REPEAT | +- FAST_PATH_NO_NORMAL_REPEAT | +- FAST_PATH_COVERS_CLIP; ++ FAST_PATH_NO_NORMAL_REPEAT; + break; + + default: +diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h +index 564f8f0..440ae7a 100644 +--- a/pixman/pixman-private.h ++++ b/pixman/pixman-private.h +@@ -602,8 +602,6 @@ _pixman_choose_implementation (void); + (FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_NO_ALPHA_MAP | \ + FAST_PATH_NO_CONVOLUTION_FILTER | \ +- FAST_PATH_NO_PAD_REPEAT | \ +- FAST_PATH_NO_REFLECT_REPEAT | \ + FAST_PATH_NO_ACCESSORS | \ + FAST_PATH_NARROW_FORMAT | \ + FAST_PATH_COVERS_CLIP) +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch new file mode 100644 index 0000000000..2264d0d3b6 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.4/0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch @@ -0,0 +1,114 @@ +From c5099dabb417cab343185d6e22ae4925e53a756f Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Tue, 16 Mar 2010 16:55:28 +0100 +Subject: [PATCH 3/6] Generic C implementation of pixman_blt with overlapping support + +Uses memcpy/memmove functions to copy pixels, can handle the +case when both source and destination areas are in the same +image (this is useful for scrolling). + +It is assumed that copying direction is only important when +using the same image for both source and destination (and +src_stride == dst_stride). Copying direction is undefined +for the images with different source and destination stride +which happen to be in the overlapped areas (but this is an +unrealistic case anyway). +--- + pixman/pixman-general.c | 21 ++++++++++++++++++--- + pixman/pixman-private.h | 43 +++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 61 insertions(+), 3 deletions(-) + +diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c +index 4d234a0..c4d2c14 100644 +--- a/pixman/pixman-general.c ++++ b/pixman/pixman-general.c +@@ -280,9 +280,24 @@ general_blt (pixman_implementation_t *imp, + int width, + int height) + { +- /* We can't blit unless we have sse2 or mmx */ +- +- return FALSE; ++ uint8_t *dst_bytes = (uint8_t *)dst_bits; ++ uint8_t *src_bytes = (uint8_t *)src_bits; ++ int bpp; ++ ++ if (src_bpp != dst_bpp || src_bpp & 7) ++ return FALSE; ++ ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp, ++ dst_bytes + dst_y * dst_stride + dst_x * bpp, ++ src_stride, ++ dst_stride, ++ width, ++ height); ++ return TRUE; + } + + static pixman_bool_t +diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h +index 440ae7a..aac2067 100644 +--- a/pixman/pixman-private.h ++++ b/pixman/pixman-private.h +@@ -10,6 +10,7 @@ + + #include "pixman.h" + #include ++#include + #include + #include + #include +@@ -883,4 +884,46 @@ void pixman_timer_register (pixman_timer_t *timer); + + #endif /* PIXMAN_TIMERS */ + ++/* a helper function, can blit 8-bit images with src/dst overlapping support */ ++static inline void ++pixman_blt_helper (uint8_t *src_bytes, ++ uint8_t *dst_bytes, ++ int src_stride, ++ int dst_stride, ++ int width, ++ int height) ++{ ++ /* ++ * The second part of this check is not strictly needed, but it prevents ++ * unnecessary upside-down processing of areas which belong to different ++ * images. Upside-down processing can be slower with fixed-distance-ahead ++ * prefetch and perceived as having more tearing. ++ */ ++ if (src_bytes < dst_bytes + width && ++ src_bytes + src_stride * height > dst_bytes) ++ { ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ /* Horizontal scrolling to the left needs memmove */ ++ if (src_bytes + width > dst_bytes) ++ { ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return; ++ } ++ } ++ while (--height >= 0) ++ { ++ memcpy (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++} ++ + #endif /* PIXMAN_PRIVATE_H */ +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch new file mode 100644 index 0000000000..bbade7f8ba --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.4/0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch @@ -0,0 +1,91 @@ +From f8c3deb1f7a26992fe217d1748a1fa5c832bbbd2 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Thu, 22 Oct 2009 05:45:47 +0300 +Subject: [PATCH 4/6] Support of overlapping src/dst for pixman_blt_mmx + +--- + pixman/pixman-mmx.c | 55 +++++++++++++++++++++++++++++--------------------- + 1 files changed, 32 insertions(+), 23 deletions(-) + +diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c +index b284cd3..5b6afaa 100644 +--- a/pixman/pixman-mmx.c ++++ b/pixman/pixman-mmx.c +@@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits, + { + uint8_t * src_bytes; + uint8_t * dst_bytes; +- int byte_width; ++ int bpp; + +- if (src_bpp != dst_bpp) ++ if (src_bpp != dst_bpp || src_bpp & 7) + return FALSE; + +- if (src_bpp == 16) +- { +- src_stride = src_stride * (int) sizeof (uint32_t) / 2; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; +- src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 2 * width; +- src_stride *= 2; +- dst_stride *= 2; +- } +- else if (src_bpp == 32) ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; ++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; ++ ++ if (src_bpp != 16 && src_bpp != 32) + { +- src_stride = src_stride * (int) sizeof (uint32_t) / 4; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; +- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 4 * width; +- src_stride *= 4; +- dst_stride *= 4; ++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, ++ width, height); ++ return TRUE; + } +- else ++ ++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) + { +- return FALSE; ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ ++ if (src_bytes + width > dst_bytes) ++ { ++ /* TODO: reverse scanline copy using MMX */ ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return TRUE; ++ } + } + + while (height--) +@@ -3033,7 +3042,7 @@ pixman_blt_mmx (uint32_t *src_bits, + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; +- w = byte_width; ++ w = width; + + while (w >= 2 && ((unsigned long)d & 3)) + { +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch new file mode 100644 index 0000000000..bf540e06c5 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.4/0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch @@ -0,0 +1,91 @@ +From 79fe7f347fe396aa2c917a1928fc18ab9321336c Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Thu, 22 Oct 2009 05:45:54 +0300 +Subject: [PATCH 5/6] Support of overlapping src/dst for pixman_blt_sse2 + +--- + pixman/pixman-sse2.c | 55 +++++++++++++++++++++++++++++-------------------- + 1 files changed, 32 insertions(+), 23 deletions(-) + +diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c +index 33d71ee..dcd6dcd 100644 +--- a/pixman/pixman-sse2.c ++++ b/pixman/pixman-sse2.c +@@ -5614,34 +5614,43 @@ pixman_blt_sse2 (uint32_t *src_bits, + { + uint8_t * src_bytes; + uint8_t * dst_bytes; +- int byte_width; ++ int bpp; + +- if (src_bpp != dst_bpp) ++ if (src_bpp != dst_bpp || src_bpp & 7) + return FALSE; + +- if (src_bpp == 16) +- { +- src_stride = src_stride * (int) sizeof (uint32_t) / 2; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 2; +- src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 2 * width; +- src_stride *= 2; +- dst_stride *= 2; +- } +- else if (src_bpp == 32) ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; ++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; ++ ++ if (src_bpp != 16 && src_bpp != 32) + { +- src_stride = src_stride * (int) sizeof (uint32_t) / 4; +- dst_stride = dst_stride * (int) sizeof (uint32_t) / 4; +- src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x)); +- dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x)); +- byte_width = 4 * width; +- src_stride *= 4; +- dst_stride *= 4; ++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, ++ width, height); ++ return TRUE; + } +- else ++ ++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) + { +- return FALSE; ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ ++ if (src_bytes + width > dst_bytes) ++ { ++ /* TODO: reverse scanline copy using SSE2 */ ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return TRUE; ++ } + } + + cache_prefetch ((__m128i*)src_bytes); +@@ -5654,7 +5663,7 @@ pixman_blt_sse2 (uint32_t *src_bits, + uint8_t *d = dst_bytes; + src_bytes += src_stride; + dst_bytes += dst_stride; +- w = byte_width; ++ w = width; + + cache_prefetch_next ((__m128i*)s); + cache_prefetch_next ((__m128i*)d); +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch new file mode 100644 index 0000000000..cd25193670 --- /dev/null +++ b/recipes/xorg-lib/pixman-0.19.4/0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch @@ -0,0 +1,94 @@ +From ea0f7b1ae605bb57ca23e88b38b9c19390596723 Mon Sep 17 00:00:00 2001 +From: Siarhei Siamashka +Date: Wed, 18 Nov 2009 06:08:48 +0200 +Subject: [PATCH 6/6] Support of overlapping src/dst for pixman_blt_neon + +--- + pixman/pixman-arm-neon.c | 62 +++++++++++++++++++++++++++++++++++++-------- + 1 files changed, 51 insertions(+), 11 deletions(-) + +diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c +index e0d2001..db1c2df 100644 +--- a/pixman/pixman-arm-neon.c ++++ b/pixman/pixman-arm-neon.c +@@ -172,26 +172,66 @@ pixman_blt_neon (uint32_t *src_bits, + int width, + int height) + { +- if (src_bpp != dst_bpp) ++ uint8_t * src_bytes; ++ uint8_t * dst_bytes; ++ int bpp; ++ ++ if (src_bpp != dst_bpp || src_bpp & 7) + return FALSE; + ++ bpp = src_bpp >> 3; ++ width *= bpp; ++ src_stride *= 4; ++ dst_stride *= 4; ++ src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp; ++ dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp; ++ ++ if (src_bpp != 16 && src_bpp != 32) ++ { ++ pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride, ++ width, height); ++ return TRUE; ++ } ++ ++ if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes) ++ { ++ src_bytes += src_stride * height - src_stride; ++ dst_bytes += dst_stride * height - dst_stride; ++ dst_stride = -dst_stride; ++ src_stride = -src_stride; ++ ++ if (src_bytes + width > dst_bytes) ++ { ++ /* TODO: reverse scanline copy using NEON */ ++ while (--height >= 0) ++ { ++ memmove (dst_bytes, src_bytes, width); ++ dst_bytes += dst_stride; ++ src_bytes += src_stride; ++ } ++ return TRUE; ++ } ++ } ++ + switch (src_bpp) + { + case 16: + pixman_composite_src_0565_0565_asm_neon ( +- width, height, +- (uint16_t *)(((char *) dst_bits) + +- dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2, +- (uint16_t *)(((char *) src_bits) + +- src_y * src_stride * 4 + src_x * 2), src_stride * 2); ++ width >> 1, ++ height, ++ (uint16_t *) dst_bytes, ++ dst_stride >> 1, ++ (uint16_t *) src_bytes, ++ src_stride >> 1); + return TRUE; + case 32: + pixman_composite_src_8888_8888_asm_neon ( +- width, height, +- (uint32_t *)(((char *) dst_bits) + +- dst_y * dst_stride * 4 + dst_x * 4), dst_stride, +- (uint32_t *)(((char *) src_bits) + +- src_y * src_stride * 4 + src_x * 4), src_stride); ++ width >> 2, ++ height, ++ (uint32_t *) dst_bytes, ++ dst_stride >> 2, ++ (uint32_t *) src_bytes, ++ src_stride >> 2); + return TRUE; + default: + return FALSE; +-- +1.6.6.1 + diff --git a/recipes/xorg-lib/pixman_0.19.4.bb b/recipes/xorg-lib/pixman_0.19.4.bb new file mode 100644 index 0000000000..19fb1d4a4c --- /dev/null +++ b/recipes/xorg-lib/pixman_0.19.4.bb @@ -0,0 +1,23 @@ +require pixman.inc + +SRC_URI[archive.md5sum] = "100a2d23f1d5683fdaa5d7ca71a0182b" +SRC_URI[archive.sha256sum] = "04e613f87fec13e5d6e8540587af1112e9ab19f9d550751e848a2d65deb26fd6" + +PR = "${INC_PR}.0" + +SRC_URI += "\ + file://0001-ARM-HACK-added-NEON-optimizations-for-fetch-store-r5.patch \ + file://0002-Don-t-discriminate-PAD-and-REFLECT-repeat-in-standar.patch \ + file://0003-Generic-C-implementation-of-pixman_blt-with-overlapp.patch \ + file://0004-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch \ + file://0005-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch \ + file://0006-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch \ +" + +NEON = " --disable-arm-neon " +NEON_armv7a = " " + +EXTRA_OECONF = "${NEON} --disable-gtk" + +DEFAULT_PREFERENCE = "-1" +DEFAULT_PREFERENCE_angstrom = "2" -- cgit 1.2.3-korg