pixman: add 0.21.6 + fixes

Signed-off-by: Koen Kooi <koen@openembedded.org> Acked-by: Martin Jansa <Martin.Jansa@gmail.com>
author: Koen Kooi <koen@openembedded.org> 2011-04-05 13:00:12 +0200
committer: Koen Kooi <koen@openembedded.org> 2011-04-05 15:07:59 +0200
commit: c3265b14b23e1aec54f7794e753b28f0d0622d86 (patch)
tree: c84f6cda614e47d02328b19eccb99bb6c6b34aeb /recipes/xorg-lib/pixman-0.21.6
parent: 84f0436d63aef5fce34eb0c6d5b07a4e312b7049 (diff)
download: openembedded-c3265b14b23e1aec54f7794e753b28f0d0622d86.tar.gz
39 files changed, 5504 insertions, 0 deletions
diff --git a/recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch b/recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch
new file mode 100644
index 0000000000..16b6ff13f9
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0002-Fix-compilation-on-Win32.patch
@@ -0,0 +1,42 @@
+From 20ed723a5a42fb8636bc9a5f32974dec1b66a785 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Thu, 24 Feb 2011 10:44:04 +0100
+Subject: [PATCH 02/40] Fix compilation on Win32
+
+Makefile.win32 contained a typo and was missing the dependency from
+the built sources.
+---
+ pixman/Makefile.win32 |    6 ++++--
+ 1 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
+index 775fb5e..b5f9397 100644
+--- a/pixman/Makefile.win32
++++ b/pixman/Makefile.win32
+@@ -56,6 +56,8 @@ SOURCES =				\
+ 	pixman-general.c		\
+ 	$(NULL)
+ 
++BUILT_SOURCES = pixman-combine32.h pixman-combine32.c pixman-combine64.h pixman-combine64.c
++
+ # MMX compilation flags
+ ifeq ($(MMX_VAR),on)
+ CFLAGS += $(MMX_CFLAGS)
+@@ -122,7 +124,7 @@ endif
+ endif
+ 
+ # pixman compilation and linking
+-$(CFG_VAR)/%.obj: %.c
++$(CFG_VAR)/%.obj: %.c $(BUILT_SOURCES)
+ 	@mkdir -p $(CFG_VAR)
+ 	@$(CC) -c $(CFLAGS) -Fo"$@" $<
+ 
+@@ -141,4 +143,4 @@ pixman-combine64.h: pixman-combine.h.template make-combine.pl
+ 
+ clean_r:
+ 	@rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.lib $(CFG_VAR)/*.pdb $(CFG)/*.ilk || exit 0
+-	@rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk pixman-combine32.c pixman-combine64.c pixman-combine64.c pixman-combine64.h || exit 0
++	@rm -f $(CFG)/*.obj $(CFG)/*.lib $(CFG)/*.pdb $(CFG)/*.ilk $(BUILT_SOURCES) || exit 0
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch b/recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch
new file mode 100644
index 0000000000..33351a991c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0003-test-Fix-tests-for-compilation-on-Windows.patch
@@ -0,0 +1,232 @@
+From 11305b4ecdd36a17592c5c75de9157874853ab20 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 21:46:37 +0100
+Subject: [PATCH 03/40] test: Fix tests for compilation on Windows
+
+The Microsoft C compiler cannot handle subobject initialization and
+Win32 does not provide snprintf.
+
+Work around these limitations by using normal struct initialization
+and using sprintf (a manual check shows that the buffer size is
+sufficient).
+---
+ test/composite.c    |   29 +++++++++++++--------------
+ test/fetch-test.c   |   52 ++++++++++++++++++++++----------------------------
+ test/trap-crasher.c |   20 +++++++++---------
+ 3 files changed, 47 insertions(+), 54 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index e14f954..08c6689 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -617,18 +617,18 @@ eval_diff (color_t *expected, color_t *test, pixman_format_code_t format)
+ }
+ 
+ static char *
+-describe_image (image_t *info, char *buf, int buflen)
++describe_image (image_t *info, char *buf)
+ {
+     if (info->size)
+     {
+-	snprintf (buf, buflen, "%s %dx%d%s",
+-		  info->format->name,
+-		  info->size, info->size,
+-		  info->repeat ? "R" :"");
++	sprintf (buf, "%s %dx%d%s",
++		 info->format->name,
++		 info->size, info->size,
++		 info->repeat ? "R" :"");
+     }
+     else
+     {
+-	snprintf (buf, buflen, "solid");
++	sprintf (buf, "solid");
+     }
+ 
+     return buf;
+@@ -710,10 +710,9 @@ composite_test (image_t *dst,
+     {
+ 	char buf[40];
+ 
+-	snprintf (buf, sizeof (buf),
+-		  "%s %scomposite",
+-		  op->name,
+-		  component_alpha ? "CA " : "");
++	sprintf (buf, "%s %scomposite",
++		 op->name,
++		 component_alpha ? "CA " : "");
+ 
+ 	printf ("%s test error of %.4f --\n"
+ 		"           R    G    B    A\n"
+@@ -735,9 +734,9 @@ composite_test (image_t *dst,
+ 		    mask->color->b, mask->color->a,
+ 		    dst->color->r, dst->color->g,
+ 		    dst->color->b, dst->color->a);
+-	    printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+-	    printf ("mask: %s, ", describe_image (mask, buf, sizeof (buf)));
+-	    printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
++	    printf ("src: %s, ", describe_image (src, buf));
++	    printf ("mask: %s, ", describe_image (mask, buf));
++	    printf ("dst: %s\n\n", describe_image (dst, buf));
+ 	}
+ 	else
+ 	{
+@@ -747,8 +746,8 @@ composite_test (image_t *dst,
+ 		    src->color->b, src->color->a,
+ 		    dst->color->r, dst->color->g,
+ 		    dst->color->b, dst->color->a);
+-	    printf ("src: %s, ", describe_image (src, buf, sizeof (buf)));
+-	    printf ("dst: %s\n\n", describe_image (dst, buf, sizeof (buf)));
++	    printf ("src: %s, ", describe_image (src, buf));
++	    printf ("dst: %s\n\n", describe_image (dst, buf));
+ 	}
+ 
+ 	success = FALSE;
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 2ca16dd..314a072 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -8,7 +8,7 @@
+ 
+ static pixman_indexed_t mono_palette =
+ {
+-    .rgba = { 0x00000000, 0x00ffffff },
++    0, { 0x00000000, 0x00ffffff },
+ };
+ 
+ 
+@@ -24,57 +24,53 @@ typedef struct {
+ static testcase_t testcases[] =
+ {
+     {
+-	.format = PIXMAN_a8r8g8b8,
+-	.width = 2, .height = 2,
+-	.stride = 8,
+-	.src = { 0x00112233, 0x44556677,
+-	         0x8899aabb, 0xccddeeff },
+-	.dst = { 0x00112233, 0x44556677,
+-	         0x8899aabb, 0xccddeeff },
+-	.indexed = NULL,
++	PIXMAN_a8r8g8b8,
++	2, 2,
++	8,
++	{ 0x00112233, 0x44556677,
++	  0x8899aabb, 0xccddeeff },
++	{ 0x00112233, 0x44556677,
++	  0x8899aabb, 0xccddeeff },
++	NULL,
+     },
+     {
+-	.format = PIXMAN_g1,
+-	.width = 8, .height = 2,
+-	.stride = 4,
++	PIXMAN_g1,
++	8, 2,
++	4,
+ #ifdef WORDS_BIGENDIAN
+-	.src =
+ 	{
+ 	    0xaa000000,
+ 	    0x55000000
+ 	},
+ #else
+-	.src =
+ 	{
+ 	    0x00000055,
+ 	    0x000000aa
+ 	},
+ #endif
+-	.dst =
+ 	{
+ 	    0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000,
+ 	    0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff, 0x00000000, 0x00ffffff
+ 	},
+-	.indexed = &mono_palette,
++	&mono_palette,
+     },
+ #if 0
+     {
+-	.format = PIXMAN_g8,
+-	.width = 4, .height = 2,
+-	.stride = 4,
+-	.src = { 0x01234567,
+-	         0x89abcdef },
+-	.dst = { 0x00010101, 0x00232323, 0x00454545, 0x00676767,
+-	         0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
++	PIXMAN_g8,
++	4, 2,
++	4,
++	{ 0x01234567,
++	  0x89abcdef },
++	{ 0x00010101, 0x00232323, 0x00454545, 0x00676767,
++	  0x00898989, 0x00ababab, 0x00cdcdcd, 0x00efefef, },
+     },
+ #endif
+     /* FIXME: make this work on big endian */
+     {
+-	.format = PIXMAN_yv12,
+-	.width = 8, .height = 2,
+-	.stride = 8,
++	PIXMAN_yv12,
++	8, 2,
++	8,
+ #ifdef WORDS_BIGENDIAN
+-	.src =
+ 	{
+ 	    0x00ff00ff, 0x00ff00ff,
+ 	    0xff00ff00, 0xff00ff00,
+@@ -82,7 +78,6 @@ static testcase_t testcases[] =
+ 	    0x800080ff
+ 	},
+ #else
+-	.src =
+ 	{
+ 	    0xff00ff00, 0xff00ff00,
+ 	    0x00ff00ff, 0x00ff00ff,
+@@ -90,7 +85,6 @@ static testcase_t testcases[] =
+ 	    0xff800080
+ 	},
+ #endif
+-	.dst =
+ 	{
+ 	    0xff000000, 0xffffffff, 0xffb80000, 0xffffe113,
+ 	    0xff000000, 0xffffffff, 0xff0023ee, 0xff4affff,
+diff --git a/test/trap-crasher.c b/test/trap-crasher.c
+index 42b82f6..7485e62 100644
+--- a/test/trap-crasher.c
++++ b/test/trap-crasher.c
+@@ -7,21 +7,21 @@ main()
+     pixman_image_t *dst;
+     pixman_trapezoid_t traps[1] = {
+ 	{
+-	    .top = 2147483646,
+-	    .bottom = 2147483647,
+-	    .left = {
+-		.p1 = { .x = 0, .y = 0 },
+-		.p2 = { .x = 0, .y = 2147483647 }
++	    2147483646,
++	    2147483647,
++	    {
++		{ 0, 0 },
++		{ 0, 2147483647 }
+ 	    },
+-	    .right = {
+-		.p1 = { .x = 65536, .y = 0 },
+-		.p2 = { .x = 0, .y = 2147483647 }
++	    {
++		{ 65536, 0 },
++		{ 0, 2147483647 }
+ 	    }
+ 	},
+     };
+-    
++
+     dst = pixman_image_create_bits (PIXMAN_a8, 1, 1, NULL, -1);
+-    
++
+     pixman_add_trapezoids (dst, 0, 0, sizeof (traps)/sizeof (traps[0]), traps);
+     return (0);
+ }
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch b/recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch
new file mode 100644
index 0000000000..94ed0b4308
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0004-test-Add-Makefile-for-Win32.patch
@@ -0,0 +1,92 @@
+From 72f5e5f608506c18c484bc5bc3e58bd83aeb7691 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 22:04:49 +0100
+Subject: [PATCH 04/40] test: Add Makefile for Win32
+
+---
+ test/Makefile.win32 |   73 +++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 73 insertions(+), 0 deletions(-)
+ create mode 100644 test/Makefile.win32
+
+diff --git a/test/Makefile.win32 b/test/Makefile.win32
+new file mode 100644
+index 0000000..c71afe1
+--- /dev/null
++++ b/test/Makefile.win32
+@@ -0,0 +1,73 @@
++CC   = cl
++LINK = link
++
++CFG_VAR = $(CFG)
++ifeq ($(CFG_VAR),)
++CFG_VAR=release
++endif
++
++CFLAGS     = -MD -nologo -D_CRT_SECURE_NO_DEPRECATE -D_CRT_NONSTDC_NO_DEPRECATE -D_BIND_TO_CURRENT_VCLIBS_VERSION -D_MT -I../pixman -I. -I../
++TEST_LDADD = ../pixman/$(CFG_VAR)/pixman-1.lib
++INCLUDES = -I../pixman -I$(top_builddir)/pixman
++
++# optimization flags
++ifeq ($(CFG_VAR),debug)
++CFLAGS += -Od -Zi
++else
++CFLAGS += -O2
++endif
++
++SOURCES =			\
++	a1-trap-test.c		\
++	pdf-op-test.c		\
++	region-test.c		\
++	region-translate-test.c	\
++	fetch-test.c		\
++	oob-test.c		\
++	trap-crasher.c		\
++	alpha-loop.c		\
++	scaling-crash-test.c	\
++	gradient-crash-test.c	\
++	alphamap.c		\
++	stress-test.c		\
++	composite-traps-test.c	\
++	blitters-test.c		\
++	scaling-test.c		\
++	affine-test.c		\
++	composite.c		\
++	utils.c
++
++TESTS =						\
++	$(CFG_VAR)/a1-trap-test.exe		\
++	$(CFG_VAR)/pdf-op-test.exe		\
++	$(CFG_VAR)/region-test.exe		\
++	$(CFG_VAR)/region-translate-test.exe	\
++	$(CFG_VAR)/fetch-test.exe		\
++	$(CFG_VAR)/oob-test.exe			\
++	$(CFG_VAR)/trap-crasher.exe		\
++	$(CFG_VAR)/alpha-loop.exe		\
++	$(CFG_VAR)/scaling-crash-test.exe	\
++	$(CFG_VAR)/gradient-crash-test.exe	\
++	$(CFG_VAR)/alphamap.exe			\
++	$(CFG_VAR)/stress-test.exe		\
++	$(CFG_VAR)/composite-traps-test.exe	\
++	$(CFG_VAR)/blitters-test.exe		\
++	$(CFG_VAR)/scaling-test.exe		\
++	$(CFG_VAR)/affine-test.exe		\
++	$(CFG_VAR)/composite.exe
++
++
++OBJECTS     = $(patsubst %.c, $(CFG_VAR)/%.obj, $(SOURCES))
++
++$(CFG_VAR)/%.obj: %.c
++	@mkdir -p $(CFG_VAR)
++	@$(CC) -c $(CFLAGS) -Fo"$@" $<
++
++$(CFG_VAR)/%.exe: $(CFG_VAR)/%.obj
++	$(LINK) /NOLOGO /OUT:$@ $< $(CFG_VAR)/utils.obj $(TEST_LDADD)
++
++all: $(OBJECTS) $(TESTS)
++	@exit 0
++
++clean:
++	@rm -f $(CFG_VAR)/*.obj $(CFG_VAR)/*.pdb || exit 0
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch b/recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch
new file mode 100644
index 0000000000..60f9528aa4
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0005-Do-not-include-unused-headers.patch
@@ -0,0 +1,40 @@
+From 8868778ea1fdc8e70da76b3b00ea78106c5840d8 Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Tue, 22 Feb 2011 22:43:48 +0100
+Subject: [PATCH 05/40] Do not include unused headers
+
+pixman-combine32.h is included without being used both in
+pixman-image.c and in pixman-general.c.
+---
+ pixman/pixman-general.c |    2 --
+ pixman/pixman-image.c   |    1 -
+ 2 files changed, 0 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 16ea3a4..872fb7e 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -36,8 +36,6 @@
+ #include <stdlib.h>
+ #include <string.h>
+ #include "pixman-private.h"
+-#include "pixman-combine32.h"
+-#include "pixman-private.h"
+ 
+ static void
+ general_src_iter_init (pixman_implementation_t *imp,
+diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
+index 9103ca6..84bacf8 100644
+--- a/pixman/pixman-image.c
++++ b/pixman/pixman-image.c
+@@ -30,7 +30,6 @@
+ #include <assert.h>
+ 
+ #include "pixman-private.h"
+-#include "pixman-combine32.h"
+ 
+ pixman_bool_t
+ _pixman_init_gradient (gradient_t *                  gradient,
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch b/recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch
new file mode 100644
index 0000000000..80d7943977
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0006-test-Silence-MSVC-warnings.patch
@@ -0,0 +1,63 @@
+From 9ebde285fa990bfa1524f166fbfb1368c346b14a Mon Sep 17 00:00:00 2001
+From: Andrea Canciani <ranma42@gmail.com>
+Date: Thu, 24 Feb 2011 12:53:39 +0100
+Subject: [PATCH 06/40] test: Silence MSVC warnings
+
+MSVC does not notice non-returning functions (abort() / assert(0))
+and warns about paths which end with them in non-void functions:
+
+c:\cygwin\home\ranma42\code\fdo\pixman\test\fetch-test.c(114) :
+warning C4715: 'reader' : not all control paths return a value
+c:\cygwin\home\ranma42\code\fdo\pixman\test\stress-test.c(133) :
+warning C4715: 'real_reader' : not all control paths return a value
+c:\cygwin\home\ranma42\code\fdo\pixman\test\composite.c(431) :
+warning C4715: 'calc_op' : not all control paths return a value
+
+These warnings can be silenced by adding a return after the
+termination call.
+---
+ test/composite.c   |    1 +
+ test/fetch-test.c  |    1 +
+ test/stress-test.c |    2 +-
+ 3 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index 08c6689..a86e5ed 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -426,6 +426,7 @@ calc_op (pixman_op_t op, double src, double dst, double srca, double dsta)
+     case PIXMAN_OP_HSL_LUMINOSITY:
+     default:
+ 	abort();
++	return 0; /* silence MSVC */
+     }
+ #undef mult_chan
+ }
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 314a072..60bc765 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -110,6 +110,7 @@ reader (const void *src, int size)
+ 	return *(uint32_t *)src;
+     default:
+ 	assert(0);
++	return 0; /* silence MSVC */
+     }
+ }
+ 
+diff --git a/test/stress-test.c b/test/stress-test.c
+index bcbc1f8..166dc6d 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -128,7 +128,7 @@ real_reader (const void *src, int size)
+ 	return *(uint32_t *)src;
+     default:
+ 	assert (0);
+-	break;
++	return 0; /* silence MSVC */
+     }
+ }
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
new file mode 100644
index 0000000000..c5dab5c31f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0007-Main-loop-template-for-fast-single-pass-bilinear-sca.patch
@@ -0,0 +1,466 @@
+From d506bf68fd0e9a1c5dd484daee70631699918387 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 01:29:02 +0200
+Subject: [PATCH 07/40] Main loop template for fast single pass bilinear scaling
+
+Can be used for implementing SIMD optimized fast path
+functions which work with bilinear scaled source images.
+
+Similar to the template for nearest scaling main loop, the
+following types of mask are supported:
+1. no mask
+2. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
+3. solid mask
+
+PAD repeat is fully supported. NONE repeat is partially
+supported (right now only works if source image has alpha
+channel or when alpha channel of the source image does not
+have any effect on the compositing operation).
+---
+ pixman/pixman-fast-path.h |  432 +++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 432 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
+index d081222..1885d47 100644
+--- a/pixman/pixman-fast-path.h
++++ b/pixman/pixman-fast-path.h
+@@ -587,4 +587,436 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+ 
++/*****************************************************************************/
++
++/*
++ * Identify 5 zones in each scanline for bilinear scaling. Depending on
++ * whether 2 pixels to be interpolated are fetched from the image itself,
++ * from the padding area around it or from both image and padding area.
++ */
++static force_inline void
++bilinear_pad_repeat_get_scanline_bounds (int32_t         source_image_width,
++					 pixman_fixed_t  vx,
++					 pixman_fixed_t  unit_x,
++					 int32_t *       left_pad,
++					 int32_t *       left_tz,
++					 int32_t *       width,
++					 int32_t *       right_tz,
++					 int32_t *       right_pad)
++{
++	int width1 = *width, left_pad1, right_pad1;
++	int width2 = *width, left_pad2, right_pad2;
++
++	pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x,
++					&width1, &left_pad1, &right_pad1);
++	pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1,
++					unit_x, &width2, &left_pad2, &right_pad2);
++
++	*left_pad = left_pad2;
++	*left_tz = left_pad1 - left_pad2;
++	*right_tz = right_pad2 - right_pad1;
++	*right_pad = right_pad1;
++	*width -= *left_pad + *left_tz + *right_tz + *right_pad;
++}
++
++/*
++ * Main loop template for single pass bilinear scaling. It needs to be
++ * provided with 'scanline_func' which should do the compositing operation.
++ * The needed function has the following prototype:
++ *
++ *	scanline_func (dst_type_t *       dst,
++ *		       const mask_type_ * mask,
++ *		       const src_type_t * src_top,
++ *		       const src_type_t * src_bottom,
++ *		       int32_t            width,
++ *		       int                weight_top,
++ *		       int                weight_bottom,
++ *		       pixman_fixed_t     vx,
++ *		       pixman_fixed_t     unit_x,
++ *		       pixman_fixed_t     max_vx,
++ *		       pixman_bool_t      zero_src)
++ *
++ * Where:
++ *  dst                 - destination scanline buffer for storing results
++ *  mask                - mask buffer (or single value for solid mask)
++ *  src_top, src_bottom - two source scanlines
++ *  width               - number of pixels to process
++ *  weight_top          - weight of the top row for interpolation
++ *  weight_bottom       - weight of the bottom row for interpolation
++ *  vx                  - initial position for fetching the first pair of
++ *                        pixels from the source buffer
++ *  unit_x              - position increment needed to move to the next pair
++ *                        of pixels
++ *  max_vx              - image size as a fixed point value, can be used for
++ *                        implementing NORMAL repeat (when it is supported)
++ *  zero_src            - boolean hint variable, which is set to TRUE when
++ *                        all source pixels are fetched from zero padding
++ *                        zone for NONE repeat
++ *
++ * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 256,
++ *       but sometimes it may be less than that for NONE repeat when handling
++ *       fuzzy antialiased top or bottom image edges. Also both top and
++ *       bottom weight variables are guaranteed to have value in 0-255
++ *       range and can fit into unsigned byte or be used with 8-bit SIMD
++ *       multiplication instructions.
++ */
++#define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
++				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
++static void											\
++fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp,		\
++						   pixman_op_t              op,			\
++						   pixman_image_t *         src_image,		\
++						   pixman_image_t *         mask_image,		\
++						   pixman_image_t *         dst_image,		\
++						   int32_t                  src_x,		\
++						   int32_t                  src_y,		\
++						   int32_t                  mask_x,		\
++						   int32_t                  mask_y,		\
++						   int32_t                  dst_x,		\
++						   int32_t                  dst_y,		\
++						   int32_t                  width,		\
++						   int32_t                  height)		\
++{												\
++    dst_type_t *dst_line;									\
++    mask_type_t *mask_line;									\
++    src_type_t *src_first_line;									\
++    int       y1, y2;										\
++    pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
++    pixman_vector_t v;										\
++    pixman_fixed_t vx, vy;									\
++    pixman_fixed_t unit_x, unit_y;								\
++    int32_t left_pad, left_tz, right_tz, right_pad;						\
++												\
++    dst_type_t *dst;										\
++    mask_type_t solid_mask;									\
++    const mask_type_t *mask = &solid_mask;							\
++    int src_stride, mask_stride, dst_stride;							\
++												\
++    PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);	\
++    if (have_mask)										\
++    {												\
++	if (mask_is_solid)									\
++	{											\
++	    solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);	\
++	    mask_stride = 0;									\
++	}											\
++	else											\
++	{											\
++	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
++				   mask_stride, mask_line, 1);					\
++	}											\
++    }												\
++    /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
++     * transformed from destination space to source space */					\
++    PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
++												\
++    /* reference point is the center of the pixel */						\
++    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;				\
++    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;				\
++    v.vector[2] = pixman_fixed_1;								\
++												\
++    if (!pixman_transform_point_3d (src_image->common.transform, &v))				\
++	return;											\
++												\
++    unit_x = src_image->common.transform->matrix[0][0];						\
++    unit_y = src_image->common.transform->matrix[1][1];						\
++												\
++    v.vector[0] -= pixman_fixed_1 / 2;								\
++    v.vector[1] -= pixman_fixed_1 / 2;								\
++												\
++    vy = v.vector[1];										\
++												\
++    if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD ||					\
++	PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)					\
++    {												\
++	bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x,	\
++					&left_pad, &left_tz, &width, &right_tz, &right_pad);	\
++	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
++	{											\
++	    /* PAD repeat does not need special handling for 'transition zones' and */		\
++	    /* they can be combined with 'padding zones' safely */				\
++	    left_pad += left_tz;								\
++	    right_pad += right_tz;								\
++	    left_tz = right_tz = 0;								\
++	}											\
++	v.vector[0] += left_pad * unit_x;							\
++    }												\
++												\
++    while (--height >= 0)									\
++    {												\
++	int weight1, weight2;									\
++	dst = dst_line;										\
++	dst_line += dst_stride;									\
++	vx = v.vector[0];									\
++	if (have_mask && !mask_is_solid)							\
++	{											\
++	    mask = mask_line;									\
++	    mask_line += mask_stride;								\
++	}											\
++												\
++	y1 = pixman_fixed_to_int (vy);								\
++	weight2 = (vy >> 8) & 0xff;								\
++	if (weight2)										\
++	{											\
++	    /* normal case, both row weights are in 0-255 range and fit unsigned byte */	\
++	    y2 = y1 + 1;									\
++	    weight1 = 256 - weight2;								\
++	}											\
++	else											\
++	{											\
++	    /* set both top and bottom row to the same scanline, and weights to 128+128 */	\
++	    y2 = y1;										\
++	    weight1 = weight2 = 128;								\
++	}											\
++	vy += unit_y;										\
++	if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD)					\
++	{											\
++	    src_type_t *src1, *src2;								\
++	    src_type_t buf1[2];									\
++	    src_type_t buf2[2];									\
++	    repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height);				\
++	    repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height);				\
++	    src1 = src_first_line + src_stride * y1;						\
++	    src2 = src_first_line + src_stride * y2;						\
++												\
++	    if (left_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = src1[0];							\
++		buf2[0] = buf2[1] = src2[0];							\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE);		\
++		dst += left_pad;								\
++		if (have_mask && !mask_is_solid)						\
++		    mask += left_pad;								\
++	    }											\
++	    if (width > 0)									\
++	    {											\
++		scanline_func (dst, mask,							\
++			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
++		dst += width;									\
++		if (have_mask && !mask_is_solid)						\
++		    mask += width;								\
++	    }											\
++	    if (right_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = src1[src_image->bits.width - 1];				\
++		buf2[0] = buf2[1] = src2[src_image->bits.width - 1];				\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE);	\
++	    }											\
++	}											\
++	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
++	{											\
++	    src_type_t *src1, *src2;								\
++	    src_type_t buf1[2];									\
++	    src_type_t buf2[2];									\
++	    /* handle top/bottom zero padding by just setting weights to 0 if needed */		\
++	    if (y1 < 0)										\
++	    {											\
++		weight1 = 0;									\
++		y1 = 0;										\
++	    }											\
++	    if (y1 >= src_image->bits.height)							\
++	    {											\
++		weight1 = 0;									\
++		y1 = src_image->bits.height - 1;						\
++	    }											\
++	    if (y2 < 0)										\
++	    {											\
++		weight2 = 0;									\
++		y2 = 0;										\
++	    }											\
++	    if (y2 >= src_image->bits.height)							\
++	    {											\
++		weight2 = 0;									\
++		y2 = src_image->bits.height - 1;						\
++	    }											\
++	    src1 = src_first_line + src_stride * y1;						\
++	    src2 = src_first_line + src_stride * y2;						\
++												\
++	    if (left_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = 0;								\
++		buf2[0] = buf2[1] = 0;								\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE);		\
++		dst += left_pad;								\
++		if (have_mask && !mask_is_solid)						\
++		    mask += left_pad;								\
++	    }											\
++	    if (left_tz > 0)									\
++	    {											\
++		buf1[0] = 0;									\
++		buf1[1] = src1[0];								\
++		buf2[0] = 0;									\
++		buf2[1] = src2[0];								\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, left_tz, weight1, weight2,				\
++			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
++		dst += left_tz;									\
++		if (have_mask && !mask_is_solid)						\
++		    mask += left_tz;								\
++		vx += left_tz * unit_x;								\
++	    }											\
++	    if (width > 0)									\
++	    {											\
++		scanline_func (dst, mask,							\
++			       src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE);	\
++		dst += width;									\
++		if (have_mask && !mask_is_solid)						\
++		    mask += width;								\
++		vx += width * unit_x;								\
++	    }											\
++	    if (right_tz > 0)									\
++	    {											\
++		buf1[0] = src1[src_image->bits.width - 1];					\
++		buf1[1] = 0;									\
++		buf2[0] = src2[src_image->bits.width - 1];					\
++		buf2[1] = 0;									\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, right_tz, weight1, weight2,				\
++			       pixman_fixed_frac (vx), unit_x, 0, FALSE);			\
++		dst += right_tz;								\
++		if (have_mask && !mask_is_solid)						\
++		    mask += right_tz;								\
++	    }											\
++	    if (right_pad > 0)									\
++	    {											\
++		buf1[0] = buf1[1] = 0;								\
++		buf2[0] = buf2[1] = 0;								\
++		scanline_func (dst, mask,							\
++			       buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE);		\
++	    }											\
++	}											\
++	else											\
++	{											\
++	    scanline_func (dst, mask, src_first_line + src_stride * y1,				\
++			   src_first_line + src_stride * y2, width,				\
++			   weight1, weight2, vx, unit_x, max_vx, FALSE);			\
++	}											\
++    }												\
++}
++
++/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
++#define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
++				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
++	FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\
++				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
++
++#define SCALED_BILINEAR_FLAGS						\
++    (FAST_PATH_SCALE_TRANSFORM	|					\
++     FAST_PATH_NO_ALPHA_MAP	|					\
++     FAST_PATH_BILINEAR_FILTER	|					\
++     FAST_PATH_NO_ACCESSORS	|					\
++     FAST_PATH_NARROW_FORMAT)
++
++#define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func)			\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_PAD_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_null, 0,							\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func)			\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_NONE_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_null, 0,							\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func)			\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
++	PIXMAN_null, 0,							\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_PAD_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_NONE_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
++	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_PAD_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	(SCALED_BILINEAR_FLAGS		|				\
++	 FAST_PATH_NONE_REPEAT		|				\
++	 FAST_PATH_X_UNIT_POSITIVE),					\
++	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op,	\
++    }
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
++    {   PIXMAN_OP_ ## op,						\
++	PIXMAN_ ## s,							\
++	SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
++	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
++	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
++	fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op,	\
++    }
++
++/* Prefer the use of 'cover' variant, because it is faster */
++#define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func)				\
++    SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func),			\
++    SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func),			\
++    SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func)			\
++    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
++    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
++    SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func)
++
++#define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func)		\
++    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
++    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
++    SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
++
+ #endif
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch b/recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
new file mode 100644
index 0000000000..18dfcaa10f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0008-test-check-correctness-of-bilinear_pad_repeat_get_sc.patch
@@ -0,0 +1,136 @@
+From 0df43b8ae5031dd83775d00b57b6bed809db0e89 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 02:07:09 +0200
+Subject: [PATCH 08/40] test: check correctness of 'bilinear_pad_repeat_get_scanline_bounds'
+
+Individual correctness check for the new bilinear scaling related
+supplementary function. This test program uses a bit wider range
+of input arguments, not covered by other tests.
+---
+ test/Makefile.am            |    2 +
+ test/scaling-helpers-test.c |   93 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 95 insertions(+), 0 deletions(-)
+ create mode 100644 test/scaling-helpers-test.c
+
+diff --git a/test/Makefile.am b/test/Makefile.am
+index 057e9ce..9dc7219 100644
+--- a/test/Makefile.am
++++ b/test/Makefile.am
+@@ -13,6 +13,7 @@ TESTPROGRAMS =			\
+ 	trap-crasher		\
+ 	alpha-loop		\
+ 	scaling-crash-test	\
++	scaling-helpers-test	\
+ 	gradient-crash-test	\
+ 	alphamap		\
+ 	stress-test		\
+@@ -33,6 +34,7 @@ alpha_loop_SOURCES = alpha-loop.c utils.c utils.h
+ composite_SOURCES = composite.c utils.c utils.h
+ gradient_crash_test_SOURCES = gradient-crash-test.c utils.c utils.h
+ stress_test_SOURCES = stress-test.c utils.c utils.h
++scaling_helpers_test_SOURCES = scaling-helpers-test.c utils.c utils.h
+ 
+ # Benchmarks
+ 
+diff --git a/test/scaling-helpers-test.c b/test/scaling-helpers-test.c
+new file mode 100644
+index 0000000..c186138
+--- /dev/null
++++ b/test/scaling-helpers-test.c
+@@ -0,0 +1,93 @@
++#include <config.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <stdio.h>
++#include <assert.h>
++#include "utils.h"
++#include "pixman-fast-path.h"
++
++/* A trivial reference implementation for
++ * 'bilinear_pad_repeat_get_scanline_bounds'
++ */
++static void
++bilinear_pad_repeat_get_scanline_bounds_ref (int32_t        source_image_width,
++					     pixman_fixed_t vx_,
++					     pixman_fixed_t unit_x,
++					     int32_t *      left_pad,
++					     int32_t *      left_tz,
++					     int32_t *      width,
++					     int32_t *      right_tz,
++					     int32_t *      right_pad)
++{
++    int w = *width;
++    *left_pad = 0;
++    *left_tz = 0;
++    *width = 0;
++    *right_tz = 0;
++    *right_pad = 0;
++    int64_t vx = vx_;
++    while (--w >= 0)
++    {
++	if (vx < 0)
++	{
++	    if (vx + pixman_fixed_1 < 0)
++		*left_pad += 1;
++	    else
++		*left_tz += 1;
++	}
++	else if (vx + pixman_fixed_1 >= pixman_int_to_fixed (source_image_width))
++	{
++	    if (vx >= pixman_int_to_fixed (source_image_width))
++		*right_pad += 1;
++	    else
++		*right_tz += 1;
++	}
++	else
++	{
++	    *width += 1;
++	}
++	vx += unit_x;
++    }
++}
++
++int
++main (void)
++{
++    int i;
++    for (i = 0; i < 10000; i++)
++    {
++	int32_t left_pad1, left_tz1, width1, right_tz1, right_pad1;
++	int32_t left_pad2, left_tz2, width2, right_tz2, right_pad2;
++	pixman_fixed_t vx = lcg_rand_N(10000 << 16) - (3000 << 16);
++	int32_t width = lcg_rand_N(10000);
++	int32_t source_image_width = lcg_rand_N(10000) + 1;
++	pixman_fixed_t unit_x = lcg_rand_N(10 << 16) + 1;
++	width1 = width2 = width;
++
++	bilinear_pad_repeat_get_scanline_bounds_ref (source_image_width,
++						     vx,
++						     unit_x,
++						     &left_pad1,
++						     &left_tz1,
++						     &width1,
++						     &right_tz1,
++						     &right_pad1);
++
++	bilinear_pad_repeat_get_scanline_bounds (source_image_width,
++						 vx,
++						 unit_x,
++						 &left_pad2,
++						 &left_tz2,
++						 &width2,
++						 &right_tz2,
++						 &right_pad2);
++
++	assert (left_pad1 == left_pad2);
++	assert (left_tz1 == left_tz2);
++	assert (width1 == width2);
++	assert (right_tz1 == right_tz2);
++	assert (right_pad1 == right_pad2);
++    }
++
++    return 0;
++}
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch b/recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
new file mode 100644
index 0000000000..b85f78169c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0009-SSE2-optimization-for-bilinear-scaled-src_8888_8888.patch
@@ -0,0 +1,156 @@
+From 350029396d911941591149cc82b5e68a78ad6747 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Feb 2011 20:18:02 +0200
+Subject: [PATCH 09/40] SSE2 optimization for bilinear scaled 'src_8888_8888'
+
+A primitive naive implementation of bilinear scaling using SSE2 intrinsics,
+which only handles one pixel at a time. It is approximately 2x faster than
+pixman general compositing path. Single pass processing without intermediate
+temporary buffer contributes to ~15% and loop unrolling contributes to ~20%
+of this speedup.
+
+Benchmark on Intel Core i7 (x86-64):
+ Using cairo-perf-trace:
+  before: image        firefox-planet-gnome   12.566   12.610   0.23%    6/6
+  after:  image        firefox-planet-gnome   10.961   11.013   0.19%    5/6
+
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=70.48 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=165.38 MPix/s
+---
+ pixman/pixman-sse2.c |  112 ++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 112 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 88287b4..696005f 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -5567,6 +5567,114 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ 			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ 			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+ 
++static void
++bilinear_interpolate_line_sse2 (uint32_t *       out,
++                                const uint32_t * top,
++                                const uint32_t * bottom,
++                                int              wt,
++                                int              wb,
++                                pixman_fixed_t   x,
++                                pixman_fixed_t   ux,
++                                int              width)
++{
++    const __m128i xmm_wt = _mm_set_epi16 (wt, wt, wt, wt, wt, wt, wt, wt);
++    const __m128i xmm_wb = _mm_set_epi16 (wb, wb, wb, wb, wb, wb, wb, wb);
++    const __m128i xmm_xorc = _mm_set_epi16 (0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff);
++    const __m128i xmm_addc = _mm_set_epi16 (0, 0, 0, 0, 1, 1, 1, 1);
++    const __m128i xmm_ux = _mm_set_epi16 (ux, ux, ux, ux, ux, ux, ux, ux);
++    const __m128i xmm_zero = _mm_setzero_si128 ();
++    __m128i xmm_x = _mm_set_epi16 (x, x, x, x, x, x, x, x);
++    uint32_t pix1, pix2, pix3, pix4;
++
++    #define INTERPOLATE_ONE_PIXEL(pix)						\
++    do {									\
++	__m128i xmm_wh, xmm_lo, xmm_hi, a;					\
++	/* fetch 2x2 pixel block into sse2 register */				\
++	uint32_t tl = top [pixman_fixed_to_int (x)];				\
++	uint32_t tr = top [pixman_fixed_to_int (x) + 1];			\
++	uint32_t bl = bottom [pixman_fixed_to_int (x)];				\
++	uint32_t br = bottom [pixman_fixed_to_int (x) + 1];			\
++	a = _mm_set_epi32 (tr, tl, br, bl);					\
++        x += ux;								\
++	/* vertical interpolation */						\
++	a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero),	\
++					    xmm_wt),				\
++			   _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero),	\
++					    xmm_wb));				\
++	/* calculate horizontal weights */					\
++	xmm_wh = _mm_add_epi16 (xmm_addc,					\
++				_mm_xor_si128 (xmm_xorc,			\
++					       _mm_srli_epi16 (xmm_x, 8)));	\
++	xmm_x = _mm_add_epi16 (xmm_x, xmm_ux);					\
++	/* horizontal interpolation */						\
++	xmm_lo = _mm_mullo_epi16 (a, xmm_wh);					\
++	xmm_hi = _mm_mulhi_epu16 (a, xmm_wh);					\
++	a = _mm_add_epi32 (_mm_unpacklo_epi16 (xmm_lo, xmm_hi),			\
++			   _mm_unpackhi_epi16 (xmm_lo, xmm_hi));		\
++	/* shift and pack the result */						\
++	a = _mm_srli_epi32 (a, 16);						\
++	a = _mm_packs_epi32 (a, a);						\
++	a = _mm_packus_epi16 (a, a);						\
++	pix = _mm_cvtsi128_si32 (a);						\
++    } while (0)
++
++    while ((width -= 4) >= 0)
++    {
++	INTERPOLATE_ONE_PIXEL (pix1);
++	INTERPOLATE_ONE_PIXEL (pix2);
++	INTERPOLATE_ONE_PIXEL (pix3);
++	INTERPOLATE_ONE_PIXEL (pix4);
++	*out++ = pix1;
++	*out++ = pix2;
++	*out++ = pix3;
++	*out++ = pix4;
++    }
++    if (width & 2)
++    {
++	INTERPOLATE_ONE_PIXEL (pix1);
++	INTERPOLATE_ONE_PIXEL (pix2);
++	*out++ = pix1;
++	*out++ = pix2;
++    }
++    if (width & 1)
++    {
++	INTERPOLATE_ONE_PIXEL (pix1);
++	*out = pix1;
++    }
++
++    #undef INTERPOLATE_ONE_PIXEL
++}
++
++static force_inline void
++scaled_bilinear_scanline_sse2_8888_8888_SRC (uint32_t *       dst,
++					     const uint32_t * mask,
++					     const uint32_t * src_top,
++					     const uint32_t * src_bottom,
++					     int32_t          w,
++					     int              wt,
++					     int              wb,
++					     pixman_fixed_t   vx,
++					     pixman_fixed_t   unit_x,
++					     pixman_fixed_t   max_vx,
++					     pixman_bool_t    zero_src)
++{
++    bilinear_interpolate_line_sse2 (dst, src_top, src_bottom,
++				    wt, wb, vx, unit_x, w);
++}
++
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_cover_SRC,
++			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       COVER, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_pad_SRC,
++			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       PAD, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (sse2_8888_8888_none_SRC,
++			       scaled_bilinear_scanline_sse2_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       NONE, FALSE, FALSE)
++
+ static const pixman_fast_path_t sse2_fast_paths[] =
+ {
+     /* PIXMAN_OP_OVER */
+@@ -5668,6 +5776,10 @@ static const pixman_fast_path_t sse2_fast_paths[] =
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, sse2_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, sse2_8888_8888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch b/recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
new file mode 100644
index 0000000000..4d411625ae
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0010-ARM-NEON-optimization-for-bilinear-scaled-src_8888_8.patch
@@ -0,0 +1,288 @@
+From 17feaa9c50bb8521b0366345efe181bd99754957 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Tue, 22 Feb 2011 18:45:03 +0200
+Subject: [PATCH 10/40] ARM: NEON optimization for bilinear scaled 'src_8888_8888'
+
+Initial NEON optimization for bilinear scaling. Can be probably
+improved more.
+
+Benchmark on ARM Cortex-A8:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=6.70 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=44.27 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  197 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c     |   45 ++++++++++
+ 2 files changed, 242 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 47daf45..c168e10 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2391,3 +2391,200 @@ generate_composite_function_nearest_scanline \
+     10,  /* dst_r_basereg */ \
+     8,  /* src_basereg   */ \
+     15  /* mask_basereg  */
++
++/******************************************************************************/
++
++/* Supplementary macro for setting function attributes */
++.macro pixman_asm_function fname
++    .func fname
++    .global fname
++#ifdef __ELF__
++    .hidden fname
++    .type fname, %function
++#endif
++fname:
++.endm
++
++.macro bilinear_interpolate_last_pixel
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d0}, [TMP1]
++    vshr.u16  d30, d24, #8
++    vld1.32   {d1}, [TMP2]
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    /* 5 cycles bubble */
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    /* 5 cycles bubble */
++    vshrn.u32 d0, q0, #16
++    /* 3 cycles bubble */
++    vmovn.u16 d0, q0
++    /* 1 cycle bubble */
++    vst1.32   {d0[0]}, [OUT, :32]!
++.endm
++
++.macro bilinear_interpolate_two_pixels
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d0}, [TMP1]
++    vld1.32   {d1}, [TMP2]
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d20}, [TMP1]
++    vld1.32   {d21}, [TMP2]
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    vshrn.u32 d30, q0, #16
++    vshrn.u32 d31, q10, #16
++    vmovn.u16 d0, q15
++    vst1.32   {d0}, [OUT]!
++.endm
++
++.macro bilinear_interpolate_four_pixels
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d0}, [TMP1]
++    vld1.32   {d1}, [TMP2]
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d20}, [TMP1]
++    vld1.32   {d21}, [TMP2]
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d4}, [TMP1]
++    vld1.32   {d5}, [TMP2]
++    vmull.u8  q3, d4, d28
++    vmlal.u8  q3, d5, d29
++    mov       TMP1, X, asr #16
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {d16}, [TMP1]
++    vld1.32   {d17}, [TMP2]
++    vmull.u8  q9, d16, d28
++    vmlal.u8  q9, d17, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q2, d6, #8
++    vmlsl.u16 q2, d6, d30
++    vmlal.u16 q2, d7, d30
++    vshll.u16 q8, d18, #8
++    vmlsl.u16 q8, d18, d31
++    vmlal.u16 q8, d19, d31
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q10, #16
++    vshrn.u32 d4, q2, #16
++    vshrn.u32 d5, q8, #16
++    vmovn.u16 d0, q0
++    vmovn.u16 d1, q2
++    vst1.32   {d0, d1}, [OUT]!
++.endm
++
++
++/*
++ * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t *       out,
++ *                                                const uint32_t * top,
++ *                                                const uint32_t * bottom,
++ *                                                int              wt,
++ *                                                int              wb,
++ *                                                pixman_fixed_t   x,
++ *                                                pixman_fixed_t   ux,
++ *                                                int              width)
++ */
++
++pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
++    OUT       .req      r0
++    TOP       .req      r1
++    BOTTOM    .req      r2
++    WT        .req      r3
++    WB        .req      r4
++    X         .req      r5
++    UX        .req      r6
++    WIDTH     .req      ip
++    TMP1      .req      r3
++    TMP2      .req      r4
++
++    mov       ip, sp
++    push      {r4, r5, r6, r7}
++    ldmia     ip, {WB, X, UX, WIDTH}
++
++    cmp       WIDTH, #0
++    ble       3f
++    vdup.u16  q12, X
++    vdup.u16  q13, UX
++    vdup.u8   d28, WT
++    vdup.u8   d29, WB
++    vadd.u16  d25, d25, d26
++    vadd.u16  q13, q13, q13
++
++    subs      WIDTH, WIDTH, #4
++    blt       1f
++0:
++    bilinear_interpolate_four_pixels
++    subs      WIDTH, WIDTH, #4
++    bge       0b
++1:
++    tst       WIDTH, #2
++    beq       2f
++    bilinear_interpolate_two_pixels
++2:
++    tst       WIDTH, #1
++    beq       3f
++    bilinear_interpolate_last_pixel
++3:
++    pop       {r4, r5, r6, r7}
++    bx        lr
++
++    .unreq    OUT
++    .unreq    TOP
++    .unreq    BOTTOM
++    .unreq    WT
++    .unreq    WB
++    .unreq    X
++    .unreq    UX
++    .unreq    WIDTH
++    .unreq    TMP1
++    .unreq    TMP2
++.endfunc
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 3e0c0d1..c7c0254 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -232,6 +232,47 @@ pixman_blt_neon (uint32_t *src_bits,
+     }
+ }
+ 
++void
++pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t *       out,
++                                                        const uint32_t * top,
++                                                        const uint32_t * bottom,
++                                                        int              wt,
++                                                        int              wb,
++                                                        pixman_fixed_t   x,
++                                                        pixman_fixed_t   ux,
++                                                        int              width);
++
++static force_inline void
++scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t *       dst,
++					     const uint32_t * mask,
++					     const uint32_t * src_top,
++					     const uint32_t * src_bottom,
++					     int32_t          w,
++					     int              wt,
++					     int              wb,
++					     pixman_fixed_t   vx,
++					     pixman_fixed_t   unit_x,
++					     pixman_fixed_t   max_vx,
++					     pixman_bool_t    zero_src)
++{
++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
++                                                            src_bottom, wt, wb,
++                                                            vx, unit_x, w);
++}
++
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
++			       scaled_bilinear_scanline_neon_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       COVER, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
++			       scaled_bilinear_scanline_neon_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       PAD, FALSE, FALSE)
++FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
++			       scaled_bilinear_scanline_neon_8888_8888_SRC,
++			       uint32_t, uint32_t, uint32_t,
++			       NONE, FALSE, FALSE)
++
+ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ {
+     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
+@@ -343,6 +384,10 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, neon_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch b/recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
new file mode 100644
index 0000000000..97618606b1
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0011-test-In-image_endian_swap-use-pixman_image_get_forma.patch
@@ -0,0 +1,156 @@
+From 84f3c5a71a2de1a96dcf0c7f9ab0a8ee1b1b158f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Mon, 7 Mar 2011 13:45:54 -0500
+Subject: [PATCH 11/40] test: In image_endian_swap() use pixman_image_get_format() to get the bpp.
+
+There is no reason to pass in the bpp as an argument; it can be gotten
+directly from the image.
+---
+ test/affine-test.c          |    6 +++---
+ test/blitters-test.c        |    4 ++--
+ test/composite-traps-test.c |    2 +-
+ test/scaling-test.c         |    6 +++---
+ test/utils.c                |    9 +++++++--
+ test/utils.h                |    2 +-
+ 6 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/test/affine-test.c b/test/affine-test.c
+index b7a1fa6..ed8000c 100644
+--- a/test/affine-test.c
++++ b/test/affine-test.c
+@@ -95,8 +95,8 @@ test_composite (int      testnum,
+     dst_img = pixman_image_create_bits (
+         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+ 
+-    image_endian_swap (src_img, src_bpp * 8);
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (src_img);
++    image_endian_swap (dst_img);
+ 
+     pixman_transform_init_identity (&transform);
+     
+@@ -251,7 +251,7 @@ test_composite (int      testnum,
+ 	    dstbuf[i] &= 0xFFFFFF;
+     }
+ 
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (dst_img);
+ 
+     if (verbose)
+     {
+diff --git a/test/blitters-test.c b/test/blitters-test.c
+index 42181ef..63e7cb3 100644
+--- a/test/blitters-test.c
++++ b/test/blitters-test.c
+@@ -61,7 +61,7 @@ create_random_image (pixman_format_code_t *allowed_formats,
+ 	pixman_image_set_indexed (img, &(y_palette[PIXMAN_FORMAT_BPP (fmt)]));
+     }
+ 
+-    image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
++    image_endian_swap (img);
+ 
+     if (used_fmt) *used_fmt = fmt;
+     return img;
+@@ -101,7 +101,7 @@ free_random_image (uint32_t initcrc,
+ 	/* swap endiannes in order to provide identical results on both big
+ 	 * and litte endian systems
+ 	 */
+-	image_endian_swap (img, PIXMAN_FORMAT_BPP (fmt));
++	image_endian_swap (img);
+ 	crc32 = compute_crc32 (initcrc, data, stride * height);
+     }
+ 
+diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
+index 8f32778..298537d 100644
+--- a/test/composite-traps-test.c
++++ b/test/composite-traps-test.c
+@@ -218,7 +218,7 @@ test_composite (int      testnum,
+ 	    dst_bits[i] &= 0xFFFFFF;
+     }
+ 
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (dst_img);
+ 
+     if (verbose)
+     {
+diff --git a/test/scaling-test.c b/test/scaling-test.c
+index dbb9d39..82370f7 100644
+--- a/test/scaling-test.c
++++ b/test/scaling-test.c
+@@ -140,8 +140,8 @@ test_composite (int      testnum,
+     dst_img = pixman_image_create_bits (
+         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
+ 
+-    image_endian_swap (src_img, src_bpp * 8);
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (src_img);
++    image_endian_swap (dst_img);
+ 
+     if (lcg_rand_n (4) > 0)
+     {
+@@ -330,7 +330,7 @@ test_composite (int      testnum,
+ 	    dstbuf[i] &= 0xFFFFFF;
+     }
+ 
+-    image_endian_swap (dst_img, dst_bpp * 8);
++    image_endian_swap (dst_img);
+ 
+     if (verbose)
+     {
+diff --git a/test/utils.c b/test/utils.c
+index 2f21398..4bf02e1 100644
+--- a/test/utils.c
++++ b/test/utils.c
+@@ -133,11 +133,12 @@ compute_crc32 (uint32_t    in_crc32,
+ /* perform endian conversion of pixel data
+  */
+ void
+-image_endian_swap (pixman_image_t *img, int bpp)
++image_endian_swap (pixman_image_t *img)
+ {
+     int stride = pixman_image_get_stride (img);
+     uint32_t *data = pixman_image_get_data (img);
+     int height = pixman_image_get_height (img);
++    int bpp = PIXMAN_FORMAT_BPP (pixman_image_get_format (img));
+     int i, j;
+ 
+     /* swap bytes only on big endian systems */
+@@ -145,10 +146,13 @@ image_endian_swap (pixman_image_t *img, int bpp)
+     if (*(volatile uint8_t *)&endian_check_var != 0x12)
+ 	return;
+ 
++    if (bpp == 8)
++	return;
++
+     for (i = 0; i < height; i++)
+     {
+ 	uint8_t *line_data = (uint8_t *)data + stride * i;
+-	/* swap bytes only for 16, 24 and 32 bpp for now */
++	
+ 	switch (bpp)
+ 	{
+ 	case 1:
+@@ -208,6 +212,7 @@ image_endian_swap (pixman_image_t *img, int bpp)
+ 	    }
+ 	    break;
+ 	default:
++	    assert (FALSE);
+ 	    break;
+ 	}
+     }
+diff --git a/test/utils.h b/test/utils.h
+index 9c7bdb1..a5183f7 100644
+--- a/test/utils.h
++++ b/test/utils.h
+@@ -60,7 +60,7 @@ compute_crc32 (uint32_t    in_crc32,
+ /* perform endian conversion of pixel data
+  */
+ void
+-image_endian_swap (pixman_image_t *img, int bpp);
++image_endian_swap (pixman_image_t *img);
+ 
+ /* Allocate memory that is bounded by protected pages,
+  * so that out-of-bounds access will cause segfaults
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch b/recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
new file mode 100644
index 0000000000..9fc4cdde07
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0012-test-Do-endian-swapping-of-the-source-and-destinatio.patch
@@ -0,0 +1,36 @@
+From 84e361c8e357e26f299213fbeefe64c73447b116 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Fri, 4 Mar 2011 15:51:18 -0500
+Subject: [PATCH 12/40] test: Do endian swapping of the source and destination images.
+
+Otherwise the test fails on big endian. Fix for bug 34767, reported by
+Siarhei Siamashka.
+---
+ test/composite-traps-test.c |    4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
+index 298537d..cf30281 100644
+--- a/test/composite-traps-test.c
++++ b/test/composite-traps-test.c
+@@ -139,6 +139,8 @@ test_composite (int      testnum,
+ 	    pixman_image_set_source_clipping (src_img, 1);
+ 	    pixman_region_fini (&clip);
+ 	}
++
++	image_endian_swap (src_img);
+     }
+ 
+     /* Create destination image */
+@@ -157,6 +159,8 @@ test_composite (int      testnum,
+ 	
+ 	dst_img = pixman_image_create_bits (
+ 	    dst_format, dst_width, dst_height, dst_bits, dst_stride);
++
++	image_endian_swap (dst_img);
+     }
+ 
+     /* Create traps */
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch b/recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
new file mode 100644
index 0000000000..9d43404898
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0013-ARM-use-prefetch-in-nearest-scaled-src_0565_0565.patch
@@ -0,0 +1,77 @@
+From bb3d1b67fd0f42ae00af811c624ea1c44541034d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 6 Mar 2011 16:17:12 +0200
+Subject: [PATCH 13/40] ARM: use prefetch in nearest scaled 'src_0565_0565'
+
+Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=75.02 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=73.63 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=176.12 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=267.50 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S |   27 +++++++++++++++++++++++++--
+ 1 files changed, 25 insertions(+), 2 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index 7567700..dd1366d 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -348,6 +348,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	TMP1	.req	r4
+ 	TMP2	.req	r5
+ 	VXMASK	.req	r6
++	PF_OFFS	.req	r7
+ 
+ 	ldr	UNIT_X, [sp]
+ 	push	{r4, r5, r6, r7}
+@@ -366,12 +367,33 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 		strh	TMP2, [DST], #2
+ 	.endm
+ 
++	/*
++	 * stop prefetch before reaching the end of scanline (a good behaving
++	 * value selected based on some benchmarks with short scanlines)
++	 */
++	#define PREFETCH_BRAKING_DISTANCE 32
++
+ 	/* now do the scaling */
+ 	and	TMP1, VXMASK, VX, lsr #15
+ 	add	VX, VX, UNIT_X
+-	subs	W, #4
++	subs	W, #(8 + PREFETCH_BRAKING_DISTANCE)
++	blt	2f
++	/* set prefetch distance to 80 pixels ahead */
++	add	PF_OFFS, VX, UNIT_X, lsl #6
++	add	PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++1:	/* main loop, process 8 pixels per iteration with prefetch */
++	subs	W, W, #8
++	add	PF_OFFS, UNIT_X, lsl #3
++	scale_2_pixels
++	scale_2_pixels
++	scale_2_pixels
++	scale_2_pixels
++	pld	[SRC, PF_OFFS, lsr #15]
++	bge	1b
++2:
++	subs	W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
+ 	blt	2f
+-1: /* main loop, process 4 pixels per iteration */
++1:	/* process the remaining pixels */
+ 	scale_2_pixels
+ 	scale_2_pixels
+ 	subs	W, W, #4
+@@ -394,6 +416,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	.unreq	TMP1
+ 	.unreq	TMP2
+ 	.unreq	VXMASK
++	.unreq	PF_OFFS
+ 	/* return */
+ 	pop	{r4, r5, r6, r7}
+ 	bx	lr
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch b/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
new file mode 100644
index 0000000000..115d5170c6
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0014-ARM-common-macro-for-nearest-scaling-fast-paths.patch
@@ -0,0 +1,131 @@
+From f3e17872f5522e25da8e32de83e62bee8cc198d7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 7 Mar 2011 03:10:43 +0200
+Subject: [PATCH 14/40] ARM: common macro for nearest scaling fast paths
+
+The code of nearest scaled 'src_0565_0565' function was generalized
+and moved to a common macro, so that it can be reused for other
+fast paths.
+---
+ pixman/pixman-arm-simd-asm.S |   60 +++++++++++++++++++++++++----------------
+ 1 files changed, 36 insertions(+), 24 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index dd1366d..a9775e2 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -331,15 +331,29 @@ pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ .endfunc
+ 
+ /*
+- * Note: This function is only using armv4t instructions (not even armv6),
++ * Note: This code is only using armv5te instructions (not even armv6),
+  *       but is scheduled for ARM Cortex-A8 pipeline. So it might need to
+  *       be split into a few variants, tuned for each microarchitecture.
+  *
+  * TODO: In order to get good performance on ARM9/ARM11 cores (which don't
+  * have efficient write combining), it needs to be changed to use 16-byte
+  * aligned writes using STM instruction.
++ *
++ * Nearest scanline scaler macro template uses the following arguments:
++ *  fname                     - name of the function to generate
++ *  bpp_shift                 - (1 << bpp_shift) is the size of pixel in bytes
++ *  t                         - type suffix for LDR/STR instructions
++ *  prefetch_distance         - prefetch in the source image by that many
++ *                              pixels ahead
++ *  prefetch_braking_distance - stop prefetching when that many pixels are
++ *                              remaining before the end of scanline
+  */
+-pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
++
++.macro generate_nearest_scanline_func fname, bpp_shift, t,      \
++                                      prefetch_distance,        \
++                                      prefetch_braking_distance
++
++pixman_asm_function fname
+ 	W	.req	r0
+ 	DST	.req	r1
+ 	SRC	.req	r2
+@@ -352,35 +366,29 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 
+ 	ldr	UNIT_X, [sp]
+ 	push	{r4, r5, r6, r7}
+-	mvn	VXMASK, #1
++	mvn	VXMASK, #((1 << bpp_shift) - 1)
+ 
+ 	/* define helper macro */
+ 	.macro	scale_2_pixels
+-		ldrh	TMP1, [SRC, TMP1]
+-		and	TMP2, VXMASK, VX, lsr #15
++		ldr&t	TMP1, [SRC, TMP1]
++		and	TMP2, VXMASK, VX, lsr #(16 - bpp_shift)
+ 		add	VX, VX, UNIT_X
+-		strh	TMP1, [DST], #2
++		str&t	TMP1, [DST], #(1 << bpp_shift)
+ 
+-		ldrh	TMP2, [SRC, TMP2]
+-		and	TMP1, VXMASK, VX, lsr #15
++		ldr&t	TMP2, [SRC, TMP2]
++		and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ 		add	VX, VX, UNIT_X
+-		strh	TMP2, [DST], #2
++		str&t	TMP2, [DST], #(1 << bpp_shift)
+ 	.endm
+ 
+-	/*
+-	 * stop prefetch before reaching the end of scanline (a good behaving
+-	 * value selected based on some benchmarks with short scanlines)
+-	 */
+-	#define PREFETCH_BRAKING_DISTANCE 32
+-
+ 	/* now do the scaling */
+-	and	TMP1, VXMASK, VX, lsr #15
++	and	TMP1, VXMASK, VX, lsr #(16 - bpp_shift)
+ 	add	VX, VX, UNIT_X
+-	subs	W, #(8 + PREFETCH_BRAKING_DISTANCE)
++	subs	W, W, #(8 + prefetch_braking_distance)
+ 	blt	2f
+-	/* set prefetch distance to 80 pixels ahead */
+-	add	PF_OFFS, VX, UNIT_X, lsl #6
+-	add	PF_OFFS, PF_OFFS, UNIT_X, lsl #4
++	/* calculate prefetch offset */
++	mov	PF_OFFS, #prefetch_distance
++	mla	PF_OFFS, UNIT_X, PF_OFFS, VX
+ 1:	/* main loop, process 8 pixels per iteration with prefetch */
+ 	subs	W, W, #8
+ 	add	PF_OFFS, UNIT_X, lsl #3
+@@ -388,10 +396,10 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	scale_2_pixels
+ 	scale_2_pixels
+ 	scale_2_pixels
+-	pld	[SRC, PF_OFFS, lsr #15]
++	pld	[SRC, PF_OFFS, lsr #(16 - bpp_shift)]
+ 	bge	1b
+ 2:
+-	subs	W, #(4 - 8 - PREFETCH_BRAKING_DISTANCE)
++	subs	W, W, #(4 - 8 - prefetch_braking_distance)
+ 	blt	2f
+ 1:	/* process the remaining pixels */
+ 	scale_2_pixels
+@@ -404,8 +412,8 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	scale_2_pixels
+ 2:
+ 	tst	W, #1
+-	ldrneh	TMP1, [SRC, TMP1]
+-	strneh	TMP1, [DST], #2
++	ldrne&t	TMP1, [SRC, TMP1]
++	strne&t	TMP1, [DST]
+ 	/* cleanup helper macro */
+ 	.purgem	scale_2_pixels
+ 	.unreq	DST
+@@ -421,3 +429,7 @@ pixman_asm_function pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6
+ 	pop	{r4, r5, r6, r7}
+ 	bx	lr
+ .endfunc
++.endm
++
++generate_nearest_scanline_func \
++    pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch b/recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
new file mode 100644
index 0000000000..cc3a68f06c
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0015-ARM-assembly-optimized-nearest-scaled-src_8888_8888.patch
@@ -0,0 +1,60 @@
+From 5921c17639fe5fdc595c850e3347281c1c8746ba Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Sun, 6 Mar 2011 22:16:32 +0200
+Subject: [PATCH 15/40] ARM: assembly optimized nearest scaled 'src_8888_8888'
+
+Benchmark on ARM Cortex-A8 r1p3 @500MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=44.36 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=39.79 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=102.36 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=163.12 MPix/s
+---
+ pixman/pixman-arm-simd-asm.S |    3 +++
+ pixman/pixman-arm-simd.c     |    9 +++++++++
+ 2 files changed, 12 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
+index a9775e2..858c690 100644
+--- a/pixman/pixman-arm-simd-asm.S
++++ b/pixman/pixman-arm-simd-asm.S
+@@ -433,3 +433,6 @@ pixman_asm_function fname
+ 
+ generate_nearest_scanline_func \
+     pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
++
++generate_nearest_scanline_func \
++    pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2,  , 48, 32
+diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
+index 6bbc109..a66f8df 100644
+--- a/pixman/pixman-arm-simd.c
++++ b/pixman/pixman-arm-simd.c
+@@ -389,6 +389,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, armv6, over_n_8_8888,
+ 
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 0565_0565, SRC,
+                                         uint16_t, uint16_t)
++PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (armv6, 8888_8888, SRC,
++                                        uint32_t, uint32_t)
+ 
+ static const pixman_fast_path_t arm_simd_fast_paths[] =
+ {
+@@ -411,6 +413,13 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
+     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, armv6_0565_0565),
+     PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, b5g6r5, b5g6r5, armv6_0565_0565),
+ 
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, armv6_8888_8888),
++    PIXMAN_ARM_SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch b/recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
new file mode 100644
index 0000000000..d8559b0c61
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0016-ARM-new-bilinear-fast-path-template-macro-in-pixman-.patch
@@ -0,0 +1,130 @@
+From 66f4ee1b3bccf4516433d61dbf2035551a712fa2 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 10:59:46 +0200
+Subject: [PATCH 16/40] ARM: new bilinear fast path template macro in 'pixman-arm-common.h'
+
+It can be reused in different ARM NEON bilinear scaling fast path functions.
+---
+ pixman/pixman-arm-common.h |   45 ++++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon.c   |   44 ++----------------------------------------
+ 2 files changed, 48 insertions(+), 41 deletions(-)
+
+diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
+index 9b1322b..c3bf986 100644
+--- a/pixman/pixman-arm-common.h
++++ b/pixman/pixman-arm-common.h
+@@ -361,4 +361,49 @@ FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                    \
+     SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+     SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+ 
++/*****************************************************************************/
++
++#define PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST(flags, cputype, name, op,     \
++                                                src_type, dst_type)           \
++void                                                                          \
++pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (               \
++                                                dst_type *       dst,         \
++                                                const src_type * top,         \
++                                                const src_type * bottom,      \
++                                                int              wt,          \
++                                                int              wb,          \
++                                                pixman_fixed_t   x,           \
++                                                pixman_fixed_t   ux,          \
++                                                int              width);      \
++                                                                              \
++static force_inline void                                                      \
++scaled_bilinear_scanline_##cputype##_##name##_##op (                          \
++                                                dst_type *       dst,         \
++                                                const uint32_t * mask,        \
++                                                const src_type * src_top,     \
++                                                const src_type * src_bottom,  \
++                                                int32_t          w,           \
++                                                int              wt,          \
++                                                int              wb,          \
++                                                pixman_fixed_t   vx,          \
++                                                pixman_fixed_t   unit_x,      \
++                                                pixman_fixed_t   max_vx,      \
++                                                pixman_bool_t    zero_src)    \
++{                                                                             \
++    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
++	return;                                                               \
++    pixman_scaled_bilinear_scanline_##name##_##op##_asm_##cputype (           \
++                            dst, src_top, src_bottom, wt, wb, vx, unit_x, w); \
++}                                                                             \
++                                                                              \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                 \
++                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
++                       src_type, uint32_t, dst_type, COVER, FALSE, FALSE)     \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_none_##op,                  \
++                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
++                       src_type, uint32_t, dst_type, NONE, FALSE, FALSE)      \
++FAST_BILINEAR_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                   \
++                       scaled_bilinear_scanline_##cputype##_##name##_##op,    \
++                       src_type, uint32_t, dst_type, PAD, FALSE, FALSE)
++
+ #endif
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index c7c0254..98ad5f2 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -127,6 +127,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+                                            OVER, uint16_t, uint16_t)
+ 
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
++                                         uint32_t, uint32_t)
++
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+                                    int32_t   h,
+@@ -232,47 +235,6 @@ pixman_blt_neon (uint32_t *src_bits,
+     }
+ }
+ 
+-void
+-pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (uint32_t *       out,
+-                                                        const uint32_t * top,
+-                                                        const uint32_t * bottom,
+-                                                        int              wt,
+-                                                        int              wb,
+-                                                        pixman_fixed_t   x,
+-                                                        pixman_fixed_t   ux,
+-                                                        int              width);
+-
+-static force_inline void
+-scaled_bilinear_scanline_neon_8888_8888_SRC (uint32_t *       dst,
+-					     const uint32_t * mask,
+-					     const uint32_t * src_top,
+-					     const uint32_t * src_bottom,
+-					     int32_t          w,
+-					     int              wt,
+-					     int              wb,
+-					     pixman_fixed_t   vx,
+-					     pixman_fixed_t   unit_x,
+-					     pixman_fixed_t   max_vx,
+-					     pixman_bool_t    zero_src)
+-{
+-    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon (dst, src_top,
+-                                                            src_bottom, wt, wb,
+-                                                            vx, unit_x, w);
+-}
+-
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_cover_SRC,
+-			       scaled_bilinear_scanline_neon_8888_8888_SRC,
+-			       uint32_t, uint32_t, uint32_t,
+-			       COVER, FALSE, FALSE)
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_pad_SRC,
+-			       scaled_bilinear_scanline_neon_8888_8888_SRC,
+-			       uint32_t, uint32_t, uint32_t,
+-			       PAD, FALSE, FALSE)
+-FAST_BILINEAR_MAINLOOP_COMMON (neon_8888_8888_none_SRC,
+-			       scaled_bilinear_scanline_neon_8888_8888_SRC,
+-			       uint32_t, uint32_t, uint32_t,
+-			       NONE, FALSE, FALSE)
+-
+ static const pixman_fast_path_t arm_neon_fast_paths[] =
+ {
+     PIXMAN_STD_FAST_PATH (SRC,  r5g6b5,   null,     r5g6b5,   neon_composite_src_0565_0565),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch b/recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
new file mode 100644
index 0000000000..6efc40f6cb
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0017-ARM-NEON-common-macro-template-for-bilinear-scanline.patch
@@ -0,0 +1,271 @@
+From 34098dba6763afd3636a14f9c2a079ab08f23b2d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:34:15 +0200
+Subject: [PATCH 17/40] ARM: NEON: common macro template for bilinear scanline scalers
+
+This allows to generate bilinear scanline scaling functions targeting
+various source and destination color formats. Right now a8r8g8b8/x8r8g8b8
+and r5g6b5 color formats are supported. More formats can be added if needed.
+---
+ pixman/pixman-arm-neon-asm.S |  222 ++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman-arm-neon-asm.h |   17 +++
+ 2 files changed, 239 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index c168e10..f3784f5 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2588,3 +2588,225 @@ pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
+     .unreq    TMP1
+     .unreq    TMP2
+ .endfunc
++
++.purgem bilinear_interpolate_last_pixel
++.purgem bilinear_interpolate_two_pixels
++.purgem bilinear_interpolate_four_pixels
++
++/*
++ * Bilinear scaling support code which tries to provide pixel fetching, color
++ * format conversion, and interpolation as separate macros which can be used
++ * as the basic building blocks for constructing bilinear scanline functions.
++ */
++
++.macro bilinear_load_8888 reg1, reg2, tmp
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #2
++    add       TMP2, BOTTOM, TMP2, asl #2
++    vld1.32   {reg1}, [TMP1]
++    vld1.32   {reg2}, [TMP2]
++.endm
++
++.macro bilinear_load_0565 reg1, reg2, tmp
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    vld1.32   {reg2[0]}, [TMP1]
++    vld1.32   {reg2[1]}, [TMP2]
++    convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
++.endm
++
++.macro bilinear_store_8888 numpix, tmp1, tmp2
++.if numpix == 4
++    vst1.32   {d0, d1}, [OUT]!
++.elseif numpix == 2
++    vst1.32   {d0}, [OUT]!
++.elseif numpix == 1
++    vst1.32   {d0[0]}, [OUT, :32]!
++.else
++    .error bilinear_store_8888 numpix is unsupported
++.endif
++.endm
++
++.macro bilinear_store_0565 numpix, tmp1, tmp2
++    vuzp.u8 d0, d1
++    vuzp.u8 d2, d3
++    vuzp.u8 d1, d3
++    vuzp.u8 d0, d2
++    convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
++.if numpix == 4
++    vst1.16   {d2}, [OUT]!
++.elseif numpix == 2
++    vst1.32   {d2[0]}, [OUT]!
++.elseif numpix == 1
++    vst1.16   {d2[0]}, [OUT]!
++.else
++    .error bilinear_store_0565 numpix is unsupported
++.endif
++.endm
++
++.macro bilinear_interpolate_last_pixel src_fmt, dst_fmt
++    bilinear_load_&src_fmt d0, d1, d2
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    vshr.u16  d30, d24, #8
++    /* 4 cycles bubble */
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    /* 5 cycles bubble */
++    vshrn.u32 d0, q0, #16
++    /* 3 cycles bubble */
++    vmovn.u16 d0, q0
++    /* 1 cycle bubble */
++    bilinear_store_&dst_fmt 1, q2, q3
++.endm
++
++.macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
++    bilinear_load_&src_fmt d0, d1, d2
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    bilinear_load_&src_fmt d20, d21, d22
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    vshrn.u32 d30, q0, #16
++    vshrn.u32 d31, q10, #16
++    vmovn.u16 d0, q15
++    bilinear_store_&dst_fmt 2, q2, q3
++.endm
++
++.macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
++    bilinear_load_&src_fmt d0, d1, d2
++    vmull.u8  q1, d0, d28
++    vmlal.u8  q1, d1, d29
++    bilinear_load_&src_fmt d20, d21, d22
++    vmull.u8  q11, d20, d28
++    vmlal.u8  q11, d21, d29
++    bilinear_load_&src_fmt d4, d5, d6
++    vmull.u8  q3, d4, d28
++    vmlal.u8  q3, d5, d29
++    bilinear_load_&src_fmt d16, d17, d18
++    vmull.u8  q9, d16, d28
++    vmlal.u8  q9, d17, d29
++    pld       [TMP1, PF_OFFS]
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vshll.u16 q0, d2, #8
++    vmlsl.u16 q0, d2, d30
++    vmlal.u16 q0, d3, d30
++    vshll.u16 q10, d22, #8
++    vmlsl.u16 q10, d22, d31
++    vmlal.u16 q10, d23, d31
++    vshr.u16  q15, q12, #8
++    vshll.u16 q2, d6, #8
++    vmlsl.u16 q2, d6, d30
++    vmlal.u16 q2, d7, d30
++    vshll.u16 q8, d18, #8
++    pld       [TMP2, PF_OFFS]
++    vmlsl.u16 q8, d18, d31
++    vmlal.u16 q8, d19, d31
++    vadd.u16  q12, q12, q13
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q10, #16
++    vshrn.u32 d4, q2, #16
++    vshrn.u32 d5, q8, #16
++    vmovn.u16 d0, q0
++    vmovn.u16 d1, q2
++    bilinear_store_&dst_fmt 4, q2, q3
++.endm
++
++/*
++ * Main template macro for generating NEON optimized bilinear scanline
++ * functions.
++ *
++ * TODO: use software pipelining and aligned writes to the destination buffer
++ *       in order to improve performance
++ *
++ * Bilinear scanline scaler macro template uses the following arguments:
++ *  fname             - name of the function to generate
++ *  src_fmt           - source color format (8888 or 0565)
++ *  dst_fmt           - destination color format (8888 or 0565)
++ *  bpp_shift         - (1 << bpp_shift) is the size of source pixel in bytes
++ *  prefetch_distance - prefetch in the source image by that many
++ *                      pixels ahead
++ */
++
++.macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
++                                       bpp_shift, prefetch_distance
++
++pixman_asm_function fname
++    OUT       .req      r0
++    TOP       .req      r1
++    BOTTOM    .req      r2
++    WT        .req      r3
++    WB        .req      r4
++    X         .req      r5
++    UX        .req      r6
++    WIDTH     .req      ip
++    TMP1      .req      r3
++    TMP2      .req      r4
++    PF_OFFS   .req      r7
++    TMP3      .req      r8
++    TMP4      .req      r9
++
++    mov       ip, sp
++    push      {r4, r5, r6, r7, r8, r9}
++    mov       PF_OFFS, #prefetch_distance
++    ldmia     ip, {WB, X, UX, WIDTH}
++    mul       PF_OFFS, PF_OFFS, UX
++
++    cmp       WIDTH, #0
++    ble       3f
++
++    vdup.u16  q12, X
++    vdup.u16  q13, UX
++    vdup.u8   d28, WT
++    vdup.u8   d29, WB
++    vadd.u16  d25, d25, d26
++    vadd.u16  q13, q13, q13
++
++    subs      WIDTH, WIDTH, #4
++    blt       1f
++    mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
++0:
++    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++    subs      WIDTH, WIDTH, #4
++    bge       0b
++1:
++    tst       WIDTH, #2
++    beq       2f
++    bilinear_interpolate_two_pixels src_fmt, dst_fmt
++2:
++    tst       WIDTH, #1
++    beq       3f
++    bilinear_interpolate_last_pixel src_fmt, dst_fmt
++3:
++    pop       {r4, r5, r6, r7, r8, r9}
++    bx        lr
++
++    .unreq    OUT
++    .unreq    TOP
++    .unreq    BOTTOM
++    .unreq    WT
++    .unreq    WB
++    .unreq    X
++    .unreq    UX
++    .unreq    WIDTH
++    .unreq    TMP1
++    .unreq    TMP2
++    .unreq    PF_OFFS
++    .unreq    TMP3
++    .unreq    TMP4
++.endfunc
++
++.endm
+diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
+index 24fa361..97adc6a 100644
+--- a/pixman/pixman-arm-neon-asm.h
++++ b/pixman/pixman-arm-neon-asm.h
+@@ -1158,3 +1158,20 @@ fname:
+     vsri.u16    out, tmp1, #5
+     vsri.u16    out, tmp2, #11
+ .endm
++
++/*
++ * Conversion of four r5g6b5 pixels (in) to four x8r8g8b8 pixels
++ * returned in (out0, out1) registers pair. Requires one temporary
++ * 64-bit register (tmp). 'out1' and 'in' may overlap, the original
++ * value from 'in' is lost
++ */
++.macro convert_four_0565_to_x888_packed in, out0, out1, tmp
++    vshl.u16    out0, in,   #5  /* G top 6 bits */
++    vshl.u16    tmp,  in,   #11 /* B top 5 bits */
++    vsri.u16    in,   in,   #5  /* R is ready in top bits */
++    vsri.u16    out0, out0, #6  /* G is ready in top bits */
++    vsri.u16    tmp,  tmp,  #5  /* B is ready in top bits */
++    vshr.u16    out1, in,   #8  /* R is in place */
++    vsri.u16    out0, tmp,  #8  /* G & B is in place */
++    vzip.u16    out0, out1      /* everything is in place */
++.endm
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch b/recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
new file mode 100644
index 0000000000..245e536716
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0018-ARM-use-common-macro-template-for-bilinear-scaled-sr.patch
@@ -0,0 +1,226 @@
+From 11a0c5badbc59ce967707ef836313cc98f8aec4e Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:46:48 +0200
+Subject: [PATCH 18/40] ARM: use common macro template for bilinear scaled 'src_8888_8888'
+
+This is a cleanup for old and now duplicated code. The performance improvement
+is mostly coming from the enabled use of software prefetch, but instructions
+scheduling is also slightly better.
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=53.24 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=74.36 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  191 +-----------------------------------------
+ 1 files changed, 3 insertions(+), 188 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index f3784f5..52dc444 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2405,194 +2405,6 @@ generate_composite_function_nearest_scanline \
+ fname:
+ .endm
+ 
+-.macro bilinear_interpolate_last_pixel
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d0}, [TMP1]
+-    vshr.u16  d30, d24, #8
+-    vld1.32   {d1}, [TMP2]
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    /* 5 cycles bubble */
+-    vshll.u16 q0, d2, #8
+-    vmlsl.u16 q0, d2, d30
+-    vmlal.u16 q0, d3, d30
+-    /* 5 cycles bubble */
+-    vshrn.u32 d0, q0, #16
+-    /* 3 cycles bubble */
+-    vmovn.u16 d0, q0
+-    /* 1 cycle bubble */
+-    vst1.32   {d0[0]}, [OUT, :32]!
+-.endm
+-
+-.macro bilinear_interpolate_two_pixels
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d0}, [TMP1]
+-    vld1.32   {d1}, [TMP2]
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d20}, [TMP1]
+-    vld1.32   {d21}, [TMP2]
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+-    vshll.u16 q0, d2, #8
+-    vmlsl.u16 q0, d2, d30
+-    vmlal.u16 q0, d3, d30
+-    vshll.u16 q10, d22, #8
+-    vmlsl.u16 q10, d22, d31
+-    vmlal.u16 q10, d23, d31
+-    vshrn.u32 d30, q0, #16
+-    vshrn.u32 d31, q10, #16
+-    vmovn.u16 d0, q15
+-    vst1.32   {d0}, [OUT]!
+-.endm
+-
+-.macro bilinear_interpolate_four_pixels
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d0}, [TMP1]
+-    vld1.32   {d1}, [TMP2]
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d20}, [TMP1]
+-    vld1.32   {d21}, [TMP2]
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+-    vshll.u16 q0, d2, #8
+-    vmlsl.u16 q0, d2, d30
+-    vmlal.u16 q0, d3, d30
+-    vshll.u16 q10, d22, #8
+-    vmlsl.u16 q10, d22, d31
+-    vmlal.u16 q10, d23, d31
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d4}, [TMP1]
+-    vld1.32   {d5}, [TMP2]
+-    vmull.u8  q3, d4, d28
+-    vmlal.u8  q3, d5, d29
+-    mov       TMP1, X, asr #16
+-    mov       TMP2, X, asr #16
+-    add       X, X, UX
+-    add       TMP1, TOP, TMP1, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {d16}, [TMP1]
+-    vld1.32   {d17}, [TMP2]
+-    vmull.u8  q9, d16, d28
+-    vmlal.u8  q9, d17, d29
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+-    vshll.u16 q2, d6, #8
+-    vmlsl.u16 q2, d6, d30
+-    vmlal.u16 q2, d7, d30
+-    vshll.u16 q8, d18, #8
+-    vmlsl.u16 q8, d18, d31
+-    vmlal.u16 q8, d19, d31
+-    vshrn.u32 d0, q0, #16
+-    vshrn.u32 d1, q10, #16
+-    vshrn.u32 d4, q2, #16
+-    vshrn.u32 d5, q8, #16
+-    vmovn.u16 d0, q0
+-    vmovn.u16 d1, q2
+-    vst1.32   {d0, d1}, [OUT]!
+-.endm
+-
+-
+-/*
+- * pixman_scaled_bilinear_scanline_8888_8888_SRC (uint32_t *       out,
+- *                                                const uint32_t * top,
+- *                                                const uint32_t * bottom,
+- *                                                int              wt,
+- *                                                int              wb,
+- *                                                pixman_fixed_t   x,
+- *                                                pixman_fixed_t   ux,
+- *                                                int              width)
+- */
+-
+-pixman_asm_function pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon
+-    OUT       .req      r0
+-    TOP       .req      r1
+-    BOTTOM    .req      r2
+-    WT        .req      r3
+-    WB        .req      r4
+-    X         .req      r5
+-    UX        .req      r6
+-    WIDTH     .req      ip
+-    TMP1      .req      r3
+-    TMP2      .req      r4
+-
+-    mov       ip, sp
+-    push      {r4, r5, r6, r7}
+-    ldmia     ip, {WB, X, UX, WIDTH}
+-
+-    cmp       WIDTH, #0
+-    ble       3f
+-    vdup.u16  q12, X
+-    vdup.u16  q13, UX
+-    vdup.u8   d28, WT
+-    vdup.u8   d29, WB
+-    vadd.u16  d25, d25, d26
+-    vadd.u16  q13, q13, q13
+-
+-    subs      WIDTH, WIDTH, #4
+-    blt       1f
+-0:
+-    bilinear_interpolate_four_pixels
+-    subs      WIDTH, WIDTH, #4
+-    bge       0b
+-1:
+-    tst       WIDTH, #2
+-    beq       2f
+-    bilinear_interpolate_two_pixels
+-2:
+-    tst       WIDTH, #1
+-    beq       3f
+-    bilinear_interpolate_last_pixel
+-3:
+-    pop       {r4, r5, r6, r7}
+-    bx        lr
+-
+-    .unreq    OUT
+-    .unreq    TOP
+-    .unreq    BOTTOM
+-    .unreq    WT
+-    .unreq    WB
+-    .unreq    X
+-    .unreq    UX
+-    .unreq    WIDTH
+-    .unreq    TMP1
+-    .unreq    TMP2
+-.endfunc
+-
+-.purgem bilinear_interpolate_last_pixel
+-.purgem bilinear_interpolate_two_pixels
+-.purgem bilinear_interpolate_four_pixels
+-
+ /*
+  * Bilinear scaling support code which tries to provide pixel fetching, color
+  * format conversion, and interpolation as separate macros which can be used
+@@ -2810,3 +2622,6 @@ pixman_asm_function fname
+ .endfunc
+ 
+ .endm
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch b/recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
new file mode 100644
index 0000000000..cc1769404f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0019-ARM-NEON-optimization-for-bilinear-scaled-src_8888_0.patch
@@ -0,0 +1,51 @@
+From 2ee27e7d79637da9173ee1bf3423e5a81534ccb4 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 11:53:04 +0200
+Subject: [PATCH 19/40] ARM: NEON optimization for bilinear scaled 'src_8888_0565'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=10020565, speed=6.56 MPix/s
+  after:  op=1, src=20028888, dst=10020565, speed=61.65 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |    3 +++
+ pixman/pixman-arm-neon.c     |    5 +++++
+ 2 files changed, 8 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 52dc444..f0b42ca 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2625,3 +2625,6 @@ pixman_asm_function fname
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 98ad5f2..ba6de66 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -129,6 +129,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+ 
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+                                          uint32_t, uint32_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
++                                         uint32_t, uint16_t)
+ 
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+@@ -350,6 +352,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, neon_8888_8888),
+     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, neon_8888_8888),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch b/recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
new file mode 100644
index 0000000000..1924b3ae02
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0020-ARM-NEON-optimization-for-bilinear-scaled-src_0565_x.patch
@@ -0,0 +1,50 @@
+From 29003c3befe2159396d181ef9ac1caaadcabf382 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:21:53 +0200
+Subject: [PATCH 20/40] ARM: NEON optimization for bilinear scaled 'src_0565_x888'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=20020888, speed=3.39 MPix/s
+  after:  op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |    3 +++
+ pixman/pixman-arm-neon.c     |    4 ++++
+ 2 files changed, 7 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index f0b42ca..9245db9 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2628,3 +2628,6 @@ generate_bilinear_scanline_func \
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index ba6de66..18e26eb 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -131,6 +131,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_8888, SRC,
+                                          uint32_t, uint32_t)
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+                                          uint32_t, uint16_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
++                                         uint16_t, uint32_t)
+ 
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+@@ -355,6 +357,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, neon_8888_0565),
+     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+ 
++    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
++
+     { PIXMAN_OP_NONE },
+ };
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch b/recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
new file mode 100644
index 0000000000..a0193d1fd6
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0021-ARM-NEON-optimization-for-bilinear-scaled-src_0565_0.patch
@@ -0,0 +1,49 @@
+From fe99673719091d4a880d031add1369332a75731b Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:27:41 +0200
+Subject: [PATCH 21/40] ARM: NEON optimization for bilinear scaled 'src_0565_0565'
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=3.30 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |    3 +++
+ pixman/pixman-arm-neon.c     |    3 +++
+ 2 files changed, 6 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 9245db9..2b6875b 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2631,3 +2631,6 @@ generate_bilinear_scanline_func \
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
++
++generate_bilinear_scanline_func \
++    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 18e26eb..0a10ca1 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -133,6 +133,8 @@ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 8888_0565, SRC,
+                                          uint32_t, uint16_t)
+ PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_x888, SRC,
+                                          uint16_t, uint32_t)
++PIXMAN_ARM_BIND_SCALED_BILINEAR_SRC_DST (0, neon, 0565_0565, SRC,
++                                         uint16_t, uint16_t)
+ 
+ void
+ pixman_composite_src_n_8_asm_neon (int32_t   w,
+@@ -358,6 +360,7 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
+     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, neon_8888_0565),
+ 
+     SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, neon_0565_x888),
++    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, neon_0565_0565),
+ 
+     { PIXMAN_OP_NONE },
+ };
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch b/recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
new file mode 100644
index 0000000000..20019f45f1
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0022-ARM-a-bit-faster-NEON-bilinear-scaling-for-r5g6b5-so.patch
@@ -0,0 +1,166 @@
+From 70a923882ca24664344ba91a649e7aa12c3063f7 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 9 Mar 2011 13:55:48 +0200
+Subject: [PATCH 22/40] ARM: a bit faster NEON bilinear scaling for r5g6b5 source images
+
+Instructions scheduling improved in the code responsible for fetching r5g6b5
+pixels and converting them to the intermediate x8r8g8b8 color format used in
+the interpolation part of code. Still a lot of NEON stalls are remaining,
+which can be resolved later by the use of pipelining.
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=10020565, dst=10020565, speed=32.29 MPix/s
+          op=1, src=10020565, dst=20020888, speed=36.82 MPix/s
+  after:  op=1, src=10020565, dst=10020565, speed=41.35 MPix/s
+          op=1, src=10020565, dst=20020888, speed=49.16 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  118 +++++++++++++++++++++++++++++++++++------
+ 1 files changed, 100 insertions(+), 18 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 2b6875b..71b30ac 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2430,6 +2430,101 @@ fname:
+     convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ .endm
+ 
++.macro bilinear_load_and_vertical_interpolate_two_8888 \
++                    acc1, acc2, reg1, reg2, reg3, reg4, tmp1, tmp2
++
++    bilinear_load_8888 reg1, reg2, tmp1
++    vmull.u8  acc1, reg1, d28
++    vmlal.u8  acc1, reg2, d29
++    bilinear_load_8888 reg3, reg4, tmp2
++    vmull.u8  acc2, reg3, d28
++    vmlal.u8  acc2, reg4, d29
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_four_8888 \
++                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
++                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++
++    bilinear_load_and_vertical_interpolate_two_8888 \
++                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi
++    bilinear_load_and_vertical_interpolate_two_8888 \
++                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_two_0565 \
++                acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
++
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    add       TMP3, TOP, TMP4, asl #1
++    add       TMP4, BOTTOM, TMP4, asl #1
++    vld1.32   {acc2lo[0]}, [TMP1]
++    vld1.32   {acc2hi[0]}, [TMP3]
++    vld1.32   {acc2lo[1]}, [TMP2]
++    vld1.32   {acc2hi[1]}, [TMP4]
++    convert_0565_to_x888 acc2, reg3, reg2, reg1
++    vzip.u8   reg1, reg3
++    vzip.u8   reg2, reg4
++    vzip.u8   reg3, reg4
++    vzip.u8   reg1, reg2
++    vmull.u8  acc1, reg1, d28
++    vmlal.u8  acc1, reg2, d29
++    vmull.u8  acc2, reg3, d28
++    vmlal.u8  acc2, reg4, d29
++.endm
++
++.macro bilinear_load_and_vertical_interpolate_four_0565 \
++                xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
++                yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
++
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    add       TMP3, TOP, TMP4, asl #1
++    add       TMP4, BOTTOM, TMP4, asl #1
++    vld1.32   {xacc2lo[0]}, [TMP1]
++    vld1.32   {xacc2hi[0]}, [TMP3]
++    vld1.32   {xacc2lo[1]}, [TMP2]
++    vld1.32   {xacc2hi[1]}, [TMP4]
++    convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP2, asl #1
++    add       TMP2, BOTTOM, TMP2, asl #1
++    add       TMP3, TOP, TMP4, asl #1
++    add       TMP4, BOTTOM, TMP4, asl #1
++    vld1.32   {yacc2lo[0]}, [TMP1]
++    vzip.u8   xreg1, xreg3
++    vld1.32   {yacc2hi[0]}, [TMP3]
++    vzip.u8   xreg2, xreg4
++    vld1.32   {yacc2lo[1]}, [TMP2]
++    vzip.u8   xreg3, xreg4
++    vld1.32   {yacc2hi[1]}, [TMP4]
++    vzip.u8   xreg1, xreg2
++    convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
++    vmull.u8  xacc1, xreg1, d28
++    vzip.u8   yreg1, yreg3
++    vmlal.u8  xacc1, xreg2, d29
++    vzip.u8   yreg2, yreg4
++    vmull.u8  xacc2, xreg3, d28
++    vzip.u8   yreg3, yreg4
++    vmlal.u8  xacc2, xreg4, d29
++    vzip.u8   yreg1, yreg2
++    vmull.u8  yacc1, yreg1, d28
++    vmlal.u8  yacc1, yreg2, d29
++    vmull.u8  yacc2, yreg3, d28
++    vmlal.u8  yacc2, yreg4, d29
++.endm
++
+ .macro bilinear_store_8888 numpix, tmp1, tmp2
+ .if numpix == 4
+     vst1.32   {d0, d1}, [OUT]!
+@@ -2477,12 +2572,8 @@ fname:
+ .endm
+ 
+ .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
+-    bilinear_load_&src_fmt d0, d1, d2
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    bilinear_load_&src_fmt d20, d21, d22
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
++    bilinear_load_and_vertical_interpolate_two_&src_fmt \
++                q1, q11, d0, d1, d20, d21, d22, d23
+     vshr.u16  q15, q12, #8
+     vadd.u16  q12, q12, q13
+     vshll.u16 q0, d2, #8
+@@ -2498,18 +2589,9 @@ fname:
+ .endm
+ 
+ .macro bilinear_interpolate_four_pixels src_fmt, dst_fmt
+-    bilinear_load_&src_fmt d0, d1, d2
+-    vmull.u8  q1, d0, d28
+-    vmlal.u8  q1, d1, d29
+-    bilinear_load_&src_fmt d20, d21, d22
+-    vmull.u8  q11, d20, d28
+-    vmlal.u8  q11, d21, d29
+-    bilinear_load_&src_fmt d4, d5, d6
+-    vmull.u8  q3, d4, d28
+-    vmlal.u8  q3, d5, d29
+-    bilinear_load_&src_fmt d16, d17, d18
+-    vmull.u8  q9, d16, d28
+-    vmlal.u8  q9, d17, d29
++    bilinear_load_and_vertical_interpolate_four_&src_fmt \
++                q1, q11, d0, d1, d20, d21, d22, d23 \
++                q3, q9,  d4, d5, d16, d17, d18, d19
+     pld       [TMP1, PF_OFFS]
+     vshr.u16  q15, q12, #8
+     vadd.u16  q12, q12, q13
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch b/recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch
new file mode 100644
index 0000000000..96343f185b
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0023-In-delegate_-src-dest-_iter_init-call-delegate-direc.patch
@@ -0,0 +1,54 @@
+From be4eaa0e4f79af38b7b89c5b09ca88d3a88d9396 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Sat, 12 Mar 2011 19:06:02 -0500
+Subject: [PATCH 23/40] In delegate_{src,dest}_iter_init() call delegate directly.
+
+There is no reason to go through
+_pixman_implementation_{src,dest}_iter_init(), especially since
+_pixman_implementation_src_iter_init() is doing various other checks
+that only need to be done once.
+
+Also call delegate->src_iter_init() directly in pixman-sse2.c
+---
+ pixman/pixman-implementation.c |    4 ++--
+ pixman/pixman-sse2.c           |    2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index adaf9c6..892767e 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -122,7 +122,7 @@ delegate_src_iter_init (pixman_implementation_t *imp,
+ 			uint8_t *		 buffer,
+ 			iter_flags_t             flags)
+ {
+-    _pixman_implementation_src_iter_init (
++    imp->delegate->src_iter_init (
+ 	imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ }
+ 
+@@ -137,7 +137,7 @@ delegate_dest_iter_init (pixman_implementation_t *imp,
+ 			 uint8_t *		  buffer,
+ 			 iter_flags_t             flags)
+ {
+-    _pixman_implementation_dest_iter_init (
++    imp->delegate->dest_iter_init (
+ 	imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ }
+ 
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 696005f..d4a34e9 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -6013,7 +6013,7 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ 	}
+     }
+ 
+-    _pixman_implementation_src_iter_init (
++    imp->delegate->src_iter_init (
+ 	imp->delegate, iter, image, x, y, width, height, buffer, flags);
+ }
+ 
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch b/recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch
new file mode 100644
index 0000000000..44fd38a8da
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0024-Fill-out-parts-of-iters-in-_pixman_implementation_-s.patch
@@ -0,0 +1,111 @@
+From 74d0f44b6d6d613d24541b849835da0464cc6fd0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Sat, 12 Mar 2011 19:12:35 -0500
+Subject: [PATCH 24/40] Fill out parts of iters in _pixman_implementation_{src,dest}_iter_init()
+
+This makes _pixman_implementation_{src,dest}_iter_init() responsible
+for filling parts of the information in the iterators. Specifically,
+the information passed as arguments is stored in the iterator.
+
+Also add a height field to pixman_iter_t().
+---
+ pixman/pixman-general.c        |    6 ------
+ pixman/pixman-implementation.c |   16 ++++++++++++++++
+ pixman/pixman-private.h        |   11 ++++++++---
+ pixman/pixman-sse2.c           |    2 --
+ 4 files changed, 24 insertions(+), 11 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 872fb7e..1a0fa7c 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -44,12 +44,6 @@ general_src_iter_init (pixman_implementation_t *imp,
+ 		       int x, int y, int width, int height,
+ 		       uint8_t *buffer, iter_flags_t flags)
+ {
+-    iter->image = image;
+-    iter->x = x;
+-    iter->y = y;
+-    iter->width = width;
+-    iter->buffer = (uint32_t *)buffer;
+-
+     if (image->type == SOLID)
+     {
+ 	_pixman_solid_fill_iter_init (
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index 892767e..bdd4543 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -274,6 +274,14 @@ _pixman_implementation_src_iter_init (pixman_implementation_t	*imp,
+ 				      uint8_t			*buffer,
+ 				      iter_flags_t		 flags)
+ {
++    iter->image = image;
++    iter->buffer = (uint32_t *)buffer;
++    iter->x = x;
++    iter->y = y;
++    iter->width = width;
++    iter->height = height;
++    iter->flags = flags;
++
+     if (!image)
+     {
+ 	iter->get_scanline = get_scanline_null;
+@@ -301,6 +309,14 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t	*imp,
+ 				       uint8_t			*buffer,
+ 				       iter_flags_t		 flags)
+ {
++    iter->image = image;
++    iter->buffer = (uint32_t *)buffer;
++    iter->x = x;
++    iter->y = y;
++    iter->width = width;
++    iter->height = height;
++    iter->flags = flags;
++
+     (*imp->dest_iter_init) (
+ 	imp, iter, image, x, y, width, height, buffer, flags);
+ }
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index 1473dc4..ea9545f 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -212,14 +212,19 @@ typedef enum
+ 
+ struct pixman_iter_t
+ {
+-    pixman_iter_get_scanline_t	get_scanline;
+-    pixman_iter_write_back_t	write_back;
+-
++    /* These are initialized by _pixman_implementation_{src,dest}_init */
+     pixman_image_t *		image;
+     uint32_t *			buffer;
+     int				x, y;
+     int				width;
++    int				height;
++    iter_flags_t		flags;
++
++    /* These function pointers are initialized by the implementation */
++    pixman_iter_get_scanline_t	get_scanline;
++    pixman_iter_write_back_t	write_back;
+ 
++    /* These fields are scratch data that implementations can use */
+     uint8_t *			bits;
+     int				stride;
+ };
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index d4a34e9..43a6bf2 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -6004,8 +6004,6 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ 
+ 		iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ 		iter->stride = s;
+-		iter->width = width;
+-		iter->buffer = (uint32_t *)buffer;
+ 
+ 		iter->get_scanline = f->get_scanline;
+ 		return;
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch b/recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch
new file mode 100644
index 0000000000..1bfd6b4e92
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0025-Simplify-the-prototype-for-iterator-initializers.patch
@@ -0,0 +1,442 @@
+From 6b27768d81c254a4f1d05473157328d5a5d99b9c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Sat, 12 Mar 2011 19:42:58 -0500
+Subject: [PATCH 25/40] Simplify the prototype for iterator initializers.
+
+All of the information previously passed to the iterator initializers
+is now available in the iterator itself, so there is no need to pass
+it as arguments anymore.
+---
+ pixman/pixman-bits-image.c       |   20 +++++---------
+ pixman/pixman-conical-gradient.c |    7 +---
+ pixman/pixman-general.c          |   52 ++++++++------------------------------
+ pixman/pixman-implementation.c   |   30 ++++-----------------
+ pixman/pixman-linear-gradient.c  |   16 +++--------
+ pixman/pixman-private.h          |   40 ++++++-----------------------
+ pixman/pixman-radial-gradient.c  |    7 +---
+ pixman/pixman-solid-fill.c       |   17 +++++-------
+ pixman/pixman-sse2.c             |   25 +++++++++--------
+ 9 files changed, 61 insertions(+), 153 deletions(-)
+
+diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
+index a865d71..835ecfb 100644
+--- a/pixman/pixman-bits-image.c
++++ b/pixman/pixman-bits-image.c
+@@ -1362,12 +1362,9 @@ src_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+ 
+ void
+-_pixman_bits_image_src_iter_init (pixman_image_t *image,
+-				  pixman_iter_t *iter,
+-				  int x, int y, int width, int height,
+-				  uint8_t *buffer, iter_flags_t flags)
++_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+-    if (flags & ITER_NARROW)
++    if (iter->flags & ITER_NARROW)
+ 	iter->get_scanline = src_get_scanline_narrow;
+     else
+ 	iter->get_scanline = src_get_scanline_wide;
+@@ -1472,28 +1469,25 @@ dest_write_back_direct (pixman_iter_t *iter)
+ }
+ 
+ void
+-_pixman_bits_image_dest_iter_init (pixman_image_t *image,
+-				   pixman_iter_t *iter,
+-				   int x, int y, int width, int height,
+-				   uint8_t *buffer, iter_flags_t flags)
++_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+-    if (flags & ITER_NARROW)
++    if (iter->flags & ITER_NARROW)
+     {
+ 	if (((image->common.flags &
+ 	      (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) ==
+ 	     (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) &&
+ 	    (image->bits.format == PIXMAN_a8r8g8b8	||
+ 	     (image->bits.format == PIXMAN_x8r8g8b8	&&
+-	      (flags & ITER_LOCALIZED_ALPHA))))
++	      (iter->flags & ITER_LOCALIZED_ALPHA))))
+ 	{
+-	    iter->buffer = image->bits.bits + y * image->bits.rowstride + x;
++	    iter->buffer = image->bits.bits + iter->y * image->bits.rowstride + iter->x;
+ 
+ 	    iter->get_scanline = _pixman_iter_get_scanline_noop;
+ 	    iter->write_back = dest_write_back_direct;
+ 	}
+ 	else
+ 	{
+-	    if ((flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
++	    if ((iter->flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+ 		(ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
+ 	    {
+ 		iter->get_scanline = _pixman_iter_get_scanline_noop;
+diff --git a/pixman/pixman-conical-gradient.c b/pixman/pixman-conical-gradient.c
+index 9d7d2e8..791d4f3 100644
+--- a/pixman/pixman-conical-gradient.c
++++ b/pixman/pixman-conical-gradient.c
+@@ -171,12 +171,9 @@ conical_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+ 
+ void
+-_pixman_conical_gradient_iter_init (pixman_image_t *image,
+-				    pixman_iter_t *iter,
+-				    int x, int y, int width, int height,
+-				    uint8_t *buffer, iter_flags_t flags)
++_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+-    if (flags & ITER_NARROW)
++    if (iter->flags & ITER_NARROW)
+ 	iter->get_scanline = conical_get_scanline_narrow;
+     else
+ 	iter->get_scanline = conical_get_scanline_wide;
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 1a0fa7c..727affc 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -38,60 +38,30 @@
+ #include "pixman-private.h"
+ 
+ static void
+-general_src_iter_init (pixman_implementation_t *imp,
+-		       pixman_iter_t *iter,
+-		       pixman_image_t *image,
+-		       int x, int y, int width, int height,
+-		       uint8_t *buffer, iter_flags_t flags)
++general_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+ {
++    pixman_image_t *image = iter->image;
++
+     if (image->type == SOLID)
+-    {
+-	_pixman_solid_fill_iter_init (
+-	    image, iter, x, y, width, height, buffer, flags);
+-    }
++	_pixman_solid_fill_iter_init (image, iter);
+     else if (image->type == LINEAR)
+-    {
+-	_pixman_linear_gradient_iter_init (
+-	    image, iter, x, y, width, height, buffer, flags);
+-    }
++	_pixman_linear_gradient_iter_init (image, iter);
+     else if (image->type == RADIAL)
+-    {
+-	_pixman_radial_gradient_iter_init (
+-	    image, iter, x, y, width, height, buffer, flags);
+-    }
++	_pixman_radial_gradient_iter_init (image, iter);
+     else if (image->type == CONICAL)
+-    {
+-	_pixman_conical_gradient_iter_init (
+-	    image, iter, x, y, width, height, buffer, flags);
+-    }
++	_pixman_conical_gradient_iter_init (image, iter);
+     else if (image->type == BITS)
+-    {
+-	_pixman_bits_image_src_iter_init (
+-	    image, iter, x, y, width, height, buffer, flags);
+-    }
++	_pixman_bits_image_src_iter_init (image, iter);
+     else
+-    {
+ 	_pixman_log_error (FUNC, "Pixman bug: unknown image type\n");
+-    }
+ }
+ 
+ static void
+-general_dest_iter_init (pixman_implementation_t *imp,
+-			pixman_iter_t *iter,
+-			pixman_image_t *image,
+-			int x, int y, int width, int height,
+-			uint8_t *buffer, iter_flags_t flags)
++general_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+ {
+-    iter->image = image;
+-    iter->x = x;
+-    iter->y = y;
+-    iter->width = width;
+-    iter->buffer = (uint32_t *)buffer;
+-
+-    if (image->type == BITS)
++    if (iter->image->type == BITS)
+     {
+-	_pixman_bits_image_dest_iter_init (
+-	    image, iter, x, y, width, height, buffer, flags);
++	_pixman_bits_image_dest_iter_init (iter->image, iter);
+     }
+     else
+     {
+diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
+index bdd4543..f1d3f99 100644
+--- a/pixman/pixman-implementation.c
++++ b/pixman/pixman-implementation.c
+@@ -113,32 +113,16 @@ delegate_fill (pixman_implementation_t *imp,
+ 
+ static void
+ delegate_src_iter_init (pixman_implementation_t *imp,
+-			pixman_iter_t *	         iter,
+-			pixman_image_t *         image,
+-			int                      x,
+-			int                      y,
+-			int                      width,
+-			int                      height,
+-			uint8_t *		 buffer,
+-			iter_flags_t             flags)
++			pixman_iter_t *	         iter)
+ {
+-    imp->delegate->src_iter_init (
+-	imp->delegate, iter, image, x, y, width, height, buffer, flags);
++    imp->delegate->src_iter_init (imp->delegate, iter);
+ }
+ 
+ static void
+ delegate_dest_iter_init (pixman_implementation_t *imp,
+-			 pixman_iter_t *	  iter,
+-			 pixman_image_t *         image,
+-			 int                      x,
+-			 int                      y,
+-			 int                      width,
+-			 int                      height,
+-			 uint8_t *		  buffer,
+-			 iter_flags_t             flags)
++			 pixman_iter_t *	  iter)
+ {
+-    imp->delegate->dest_iter_init (
+-	imp->delegate, iter, image, x, y, width, height, buffer, flags);
++    imp->delegate->dest_iter_init (imp->delegate, iter);
+ }
+ 
+ pixman_implementation_t *
+@@ -293,8 +277,7 @@ _pixman_implementation_src_iter_init (pixman_implementation_t	*imp,
+     }
+     else
+     {
+-	(*imp->src_iter_init) (
+-	    imp, iter, image, x, y, width, height, buffer, flags);
++	(*imp->src_iter_init) (imp, iter);
+     }
+ }
+ 
+@@ -317,6 +300,5 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t	*imp,
+     iter->height = height;
+     iter->flags = flags;
+ 
+-    (*imp->dest_iter_init) (
+-	imp, iter, image, x, y, width, height, buffer, flags);
++    (*imp->dest_iter_init) (imp, iter);
+ }
+diff --git a/pixman/pixman-linear-gradient.c b/pixman/pixman-linear-gradient.c
+index 07303fc..6e1ea24 100644
+--- a/pixman/pixman-linear-gradient.c
++++ b/pixman/pixman-linear-gradient.c
+@@ -233,18 +233,12 @@ linear_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+ 
+ void
+-_pixman_linear_gradient_iter_init (pixman_image_t *image,
+-				   pixman_iter_t  *iter,
+-				   int             x,
+-				   int             y,
+-				   int             width,
+-				   int             height,
+-				   uint8_t        *buffer,
+-				   iter_flags_t    flags)
++_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter)
+ {
+-    if (linear_gradient_is_horizontal (image, x, y, width, height))
++    if (linear_gradient_is_horizontal (
++	    iter->image, iter->x, iter->y, iter->width, iter->height))
+     {
+-	if (flags & ITER_NARROW)
++	if (iter->flags & ITER_NARROW)
+ 	    linear_get_scanline_narrow (iter, NULL);
+ 	else
+ 	    linear_get_scanline_wide (iter, NULL);
+@@ -253,7 +247,7 @@ _pixman_linear_gradient_iter_init (pixman_image_t *image,
+     }
+     else
+     {
+-	if (flags & ITER_NARROW)
++	if (iter->flags & ITER_NARROW)
+ 	    iter->get_scanline = linear_get_scanline_narrow;
+ 	else
+ 	    iter->get_scanline = linear_get_scanline_wide;
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index ea9545f..60060a9 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -233,39 +233,22 @@ void
+ _pixman_bits_image_setup_accessors (bits_image_t *image);
+ 
+ void
+-_pixman_bits_image_src_iter_init (pixman_image_t *image,
+-				  pixman_iter_t *iter,
+-				  int x, int y, int width, int height,
+-				  uint8_t *buffer, iter_flags_t flags);
++_pixman_bits_image_src_iter_init (pixman_image_t *image, pixman_iter_t *iter);
++
+ void
+-_pixman_bits_image_dest_iter_init (pixman_image_t *image,
+-				   pixman_iter_t *iter,
+-				   int x, int y, int width, int height,
+-				   uint8_t *buffer, iter_flags_t flags);
++_pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+ 
+ void
+-_pixman_solid_fill_iter_init (pixman_image_t *image,
+-			      pixman_iter_t  *iter,
+-			      int x, int y, int width, int height,
+-			      uint8_t *buffer, iter_flags_t flags);
++_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t  *iter);
+ 
+ void
+-_pixman_linear_gradient_iter_init (pixman_image_t *image,
+-				   pixman_iter_t  *iter,
+-				   int x, int y, int width, int height,
+-				   uint8_t *buffer, iter_flags_t flags);
++_pixman_linear_gradient_iter_init (pixman_image_t *image, pixman_iter_t  *iter);
+ 
+ void
+-_pixman_radial_gradient_iter_init (pixman_image_t *image,
+-				   pixman_iter_t *iter,
+-				   int x, int y, int width, int height,
+-				   uint8_t *buffer, iter_flags_t flags);
++_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+ 
+ void
+-_pixman_conical_gradient_iter_init (pixman_image_t *image,
+-				    pixman_iter_t *iter,
+-				    int x, int y, int width, int height,
+-				    uint8_t *buffer, iter_flags_t flags);
++_pixman_conical_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter);
+ 
+ pixman_image_t *
+ _pixman_image_allocate (void);
+@@ -413,14 +396,7 @@ typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
+ 					     int                      height,
+ 					     uint32_t                 xor);
+ typedef void (*pixman_iter_init_func_t) (pixman_implementation_t *imp,
+-                                         pixman_iter_t           *iter,
+-                                         pixman_image_t          *image,
+-                                         int                      x,
+-                                         int                      y,
+-                                         int                      width,
+-                                         int                      height,
+-                                         uint8_t                 *buffer,
+-                                         iter_flags_t             flags);
++                                         pixman_iter_t           *iter);
+ 
+ void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
+ void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
+diff --git a/pixman/pixman-radial-gradient.c b/pixman/pixman-radial-gradient.c
+index 6523b82..5e9fd73 100644
+--- a/pixman/pixman-radial-gradient.c
++++ b/pixman/pixman-radial-gradient.c
+@@ -400,12 +400,9 @@ radial_get_scanline_wide (pixman_iter_t *iter, const uint32_t *mask)
+ }
+ 
+ void
+-_pixman_radial_gradient_iter_init (pixman_image_t *image,
+-				   pixman_iter_t *iter,
+-				   int x, int y, int width, int height,
+-				   uint8_t *buffer, iter_flags_t flags)
++_pixman_radial_gradient_iter_init (pixman_image_t *image, pixman_iter_t *iter)
+ {
+-    if (flags & ITER_NARROW)
++    if (iter->flags & ITER_NARROW)
+ 	iter->get_scanline = radial_get_scanline_narrow;
+     else
+ 	iter->get_scanline = radial_get_scanline_wide;
+diff --git a/pixman/pixman-solid-fill.c b/pixman/pixman-solid-fill.c
+index 67681f2..852e135 100644
+--- a/pixman/pixman-solid-fill.c
++++ b/pixman/pixman-solid-fill.c
+@@ -27,24 +27,21 @@
+ #include "pixman-private.h"
+ 
+ void
+-_pixman_solid_fill_iter_init (pixman_image_t *image,
+-			      pixman_iter_t  *iter,
+-			      int x, int y, int width, int height,
+-			      uint8_t *buffer, iter_flags_t flags)
++_pixman_solid_fill_iter_init (pixman_image_t *image, pixman_iter_t  *iter)
+ {
+-    if (flags & ITER_NARROW)
++    if (iter->flags & ITER_NARROW)
+     {
+-	uint32_t *b = (uint32_t *)buffer;
+-	uint32_t *e = b + width;
+-	uint32_t color = image->solid.color_32;
++	uint32_t *b = (uint32_t *)iter->buffer;
++	uint32_t *e = b + iter->width;
++	uint32_t color = iter->image->solid.color_32;
+ 
+ 	while (b < e)
+ 	    *(b++) = color;
+     }
+     else
+     {
+-	uint64_t *b = (uint64_t *)buffer;
+-	uint64_t *e = b + width;
++	uint64_t *b = (uint64_t *)iter->buffer;
++	uint64_t *e = b + iter->width;
+ 	uint64_t color = image->solid.color_64;
+ 
+ 	while (b < e)
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 43a6bf2..533b858 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -5978,19 +5978,21 @@ static const fetcher_info_t fetchers[] =
+ };
+ 
+ static void
+-sse2_src_iter_init (pixman_implementation_t *imp,
+-		    pixman_iter_t *iter,
+-		    pixman_image_t *image,
+-		    int x, int y, int width, int height,
+-		    uint8_t *buffer, iter_flags_t flags)
++sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+ {
++    pixman_image_t *image = iter->image;
++    int x = iter->x;
++    int y = iter->y;
++    int width = iter->width;
++    int height = iter->height;
++
+ #define FLAGS								\
+     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+ 
+-    if ((flags & ITER_NARROW)				&&
+-	(image->common.flags & FLAGS) == FLAGS		&&
+-	x >= 0 && y >= 0				&&
+-	x + width <= image->bits.width			&&
++    if ((iter->flags & ITER_NARROW)				&&
++	(image->common.flags & FLAGS) == FLAGS			&&
++	x >= 0 && y >= 0					&&
++	x + width <= image->bits.width				&&
+ 	y + height <= image->bits.height)
+     {
+ 	const fetcher_info_t *f;
+@@ -6002,7 +6004,7 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ 		uint8_t *b = (uint8_t *)image->bits.bits;
+ 		int s = image->bits.rowstride * 4;
+ 
+-		iter->bits = b + s * y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
++		iter->bits = b + s * iter->y + x * PIXMAN_FORMAT_BPP (f->format) / 8;
+ 		iter->stride = s;
+ 
+ 		iter->get_scanline = f->get_scanline;
+@@ -6011,8 +6013,7 @@ sse2_src_iter_init (pixman_implementation_t *imp,
+ 	}
+     }
+ 
+-    imp->delegate->src_iter_init (
+-	imp->delegate, iter, image, x, y, width, height, buffer, flags);
++    imp->delegate->src_iter_init (imp->delegate, iter);
+ }
+ 
+ #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch b/recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch
new file mode 100644
index 0000000000..8fc5b7706f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0026-test-Randomize-some-tests-if-PIXMAN_RANDOMIZE_TESTS-.patch
@@ -0,0 +1,187 @@
+From 7eb0abb5e819046537b9f809c7ec332c6679c557 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Mon, 14 Mar 2011 14:56:22 -0400
+Subject: [PATCH 26/40] test: Randomize some tests if PIXMAN_RANDOMIZE_TESTS is set
+
+This patch makes so that composite and stress-test will start from a
+random seed if the PIXMAN_RANDOMIZE_TESTS environment variable is
+set. Running the test suite in this mode is useful to get more test
+coverage.
+
+Also, in stress-test.c make it so that setting the initial seed causes
+threads to be turned off. This makes it much easier to see when
+something fails.
+---
+ test/composite.c   |   17 ++++++++++++-----
+ test/stress-test.c |   37 ++++++++++++++++++++++++++-----------
+ test/utils.c       |   10 ++++++++++
+ test/utils.h       |    3 +++
+ 4 files changed, 51 insertions(+), 16 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index a86e5ed..e6d52b9 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -868,7 +868,7 @@ main (int argc, char **argv)
+ {
+ #define N_TESTS (8 * 1024 * 1024)
+     int result = 0;
+-    int i;
++    uint32_t i;
+ 
+     if (argc > 1)
+     {
+@@ -890,15 +890,22 @@ main (int argc, char **argv)
+ 	}
+     }
+ 
++    uint32_t seed;
++    
++    if (getenv ("PIXMAN_RANDOMIZE_TESTS"))
++	seed = get_random_seed();
++    else
++	seed = 1;
++    
+ #ifdef USE_OPENMP
+-#   pragma omp parallel for default(none) shared(result) shared(argv) 
++#   pragma omp parallel for default(none) shared(result, argv, seed)
+ #endif
+-    for (i = 1; i <= N_TESTS; ++i)
++    for (i = seed; i <= N_TESTS; ++i)
+     {
+ 	if (!result && !run_test (i))
+ 	{
+-	    printf ("Test %d failed.\n", i);
+-
++	    printf ("Test 0x%08X failed.\n", i);
++	    
+ 	    result = i;
+ 	}
+     }
+diff --git a/test/stress-test.c b/test/stress-test.c
+index 166dc6d..d496f93 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -1,4 +1,6 @@
++#include <stdio.h>
+ #include "utils.h"
++#include <sys/types.h>
+ 
+ #if 0
+ #define fence_malloc malloc
+@@ -730,11 +732,17 @@ static const pixman_op_t op_list[] =
+ };
+ 
+ static void
+-run_test (uint32_t seed)
++run_test (uint32_t seed, pixman_bool_t verbose, uint32_t mod)
+ {
+     pixman_image_t *source, *mask, *dest;
+     pixman_op_t op;
+ 
++    if (verbose)
++    {
++	if (mod == 0 || (seed % mod) == 0)
++	    printf ("Seed 0x%08x\n", seed);
++    }
++	    
+     lcg_srand (seed);
+ 
+     source = create_random_image ();
+@@ -787,6 +795,7 @@ main (int argc, char **argv)
+     uint32_t seed = 1;
+     uint32_t n_tests = 0xffffffff;
+     uint32_t mod = 0;
++    pixman_bool_t use_threads = TRUE;
+     uint32_t i;
+ 
+     pixman_disable_out_of_bounds_workaround ();
+@@ -811,6 +820,7 @@ main (int argc, char **argv)
+ 	else if (strcmp (argv[i], "-s") == 0 && i + 1 < argc)
+ 	{
+ 	    get_int (argv[i + 1], &seed);
++	    use_threads = FALSE;
+ 	    i++;
+ 	}
+ 	else if (strcmp (argv[i], "-n") == 0 && i + 1 < argc)
+@@ -825,7 +835,7 @@ main (int argc, char **argv)
+ 
+ 	    printf ("Options:\n\n"
+ 		    "-n <number>        Number of tests to run\n"
+-		    "-s <seed> 	        Seed of first test\n"
++		    "-s <seed> 	        Seed of first test (ignored if PIXMAN_RANDOMIZE_TESTS is set)\n"
+ 		    "-v                 Print out seeds\n"
+ 		    "-v <n>             Print out every n'th seed\n\n");
+ 
+@@ -836,19 +846,24 @@ main (int argc, char **argv)
+     if (n_tests == 0xffffffff)
+ 	n_tests = 8000;
+ 
+-    /* FIXME: seed 2005763 fails in set_lum() with divide by zero */
++    if (getenv ("PIXMAN_RANDOMIZE_TESTS"))
++    {
++	seed = get_random_seed();
++	printf ("First seed: 0x%08x\n", seed);
++    }
++
++    if (use_threads)
++    {
+ #ifdef USE_OPENMP
+ #   pragma omp parallel for default(none) shared(verbose, n_tests, mod, seed)
+ #endif
+-    for (i = seed; i < seed + n_tests; ++i)
++	for (i = seed; i < seed + n_tests; ++i)
++	    run_test (i, verbose, mod);
++    }
++    else
+     {
+-	if (verbose)
+-	{
+-	    if (mod == 0 || (i % mod) == 0)
+-		printf ("Seed %d\n", i);
+-	}
+-
+-	run_test (i);
++	for (i = seed; i < seed + n_tests; ++i)
++	    run_test (i, verbose, mod);
+     }
+ 
+     return 0;
+diff --git a/test/utils.c b/test/utils.c
+index 4bf02e1..56701c4 100644
+--- a/test/utils.c
++++ b/test/utils.c
+@@ -455,6 +455,16 @@ gettime (void)
+ #endif
+ }
+ 
++uint32_t
++get_random_seed (void)
++{
++    double d = gettime();
++
++    lcg_srand (*(uint32_t *)&d);
++
++    return lcg_rand_u32 ();
++}
++
+ static const char *global_msg;
+ 
+ static void
+diff --git a/test/utils.h b/test/utils.h
+index a5183f7..615ad78 100644
+--- a/test/utils.h
++++ b/test/utils.h
+@@ -79,6 +79,9 @@ make_random_bytes (int n_bytes);
+ double
+ gettime (void);
+ 
++uint32_t
++get_random_seed (void);
++
+ /* main body of the fuzzer test */
+ int
+ fuzzer_test_main (const char *test_name,
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch b/recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch
new file mode 100644
index 0000000000..1dbac60586
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0027-Add-simple-support-for-the-r8g8b8a8-and-r8g8b8x8-for.patch
@@ -0,0 +1,206 @@
+From f05a90e5f8d1d0af60e2c684cbe9f1327c33135a Mon Sep 17 00:00:00 2001
+From: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date: Fri, 18 Mar 2011 14:36:15 +0200
+Subject: [PATCH 27/40] Add simple support for the r8g8b8a8 and r8g8b8x8 formats.
+
+This format is particularly useful on big-endian architectures, where RGBA in
+memory/file order corresponds to r8g8b8a8 as an uint32_t. This is important
+because RGBA is in some cases the only available choice (for example as a pixel
+format in OpenGL ES 2.0).
+---
+ pixman/pixman-access.c |   97 ++++++++++++++++++++++++++++++++++++++++++++++++
+ pixman/pixman.c        |    6 +++
+ pixman/pixman.h        |    6 ++-
+ 3 files changed, 108 insertions(+), 1 deletions(-)
+
+diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
+index f1ce0ba..32c4d8b 100644
+--- a/pixman/pixman-access.c
++++ b/pixman/pixman-access.c
+@@ -211,6 +211,46 @@ fetch_scanline_b8g8r8x8 (pixman_image_t *image,
+ }
+ 
+ static void
++fetch_scanline_r8g8b8a8 (pixman_image_t *image,
++                         int             x,
++                         int             y,
++                         int             width,
++                         uint32_t *      buffer,
++                         const uint32_t *mask)
++{
++    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
++    const uint32_t *pixel = (uint32_t *)bits + x;
++    const uint32_t *end = pixel + width;
++
++    while (pixel < end)
++    {
++	uint32_t p = READ (image, pixel++);
++
++	*buffer++ = (((p & 0x000000ff) << 24) | (p >> 8));
++    }
++}
++
++static void
++fetch_scanline_r8g8b8x8 (pixman_image_t *image,
++                         int             x,
++                         int             y,
++                         int             width,
++                         uint32_t *      buffer,
++                         const uint32_t *mask)
++{
++    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
++    const uint32_t *pixel = (uint32_t *)bits + x;
++    const uint32_t *end = pixel + width;
++    
++    while (pixel < end)
++    {
++	uint32_t p = READ (image, pixel++);
++	
++	*buffer++ = (0xff000000 | (p >> 8));
++    }
++}
++
++static void
+ fetch_scanline_x14r6g6b6 (pixman_image_t *image,
+                           int             x,
+                           int             y,
+@@ -1292,6 +1332,28 @@ fetch_pixel_b8g8r8x8 (bits_image_t *image,
+ }
+ 
+ static uint32_t
++fetch_pixel_r8g8b8a8 (bits_image_t *image,
++		      int           offset,
++		      int           line)
++{
++    uint32_t *bits = image->bits + line * image->rowstride;
++    uint32_t pixel = READ (image, (uint32_t *)bits + offset);
++    
++    return (((pixel & 0x000000ff) << 24) | (pixel >> 8));
++}
++
++static uint32_t
++fetch_pixel_r8g8b8x8 (bits_image_t *image,
++		      int           offset,
++		      int           line)
++{
++    uint32_t *bits = image->bits + line * image->rowstride;
++    uint32_t pixel = READ (image, (uint32_t *)bits + offset);
++    
++    return (0xff000000 | (pixel >> 8));
++}
++
++static uint32_t
+ fetch_pixel_x14r6g6b6 (bits_image_t *image,
+                        int           offset,
+                        int           line)
+@@ -2028,6 +2090,39 @@ store_scanline_b8g8r8x8 (bits_image_t *  image,
+ }
+ 
+ static void
++store_scanline_r8g8b8a8 (bits_image_t *  image,
++                         int             x,
++                         int             y,
++                         int             width,
++                         const uint32_t *values)
++{
++    uint32_t *bits = image->bits + image->rowstride * y;
++    uint32_t *pixel = (uint32_t *)bits + x;
++    int i;
++    
++    for (i = 0; i < width; ++i)
++    {
++	WRITE (image, pixel++,
++	       ((values[i] >> 24) & 0x000000ff) | (values[i] << 8));
++    }
++}
++
++static void
++store_scanline_r8g8b8x8 (bits_image_t *  image,
++                         int             x,
++                         int             y,
++                         int             width,
++                         const uint32_t *values)
++{
++    uint32_t *bits = image->bits + image->rowstride * y;
++    uint32_t *pixel = (uint32_t *)bits + x;
++    int i;
++    
++    for (i = 0; i < width; ++i)
++	WRITE (image, pixel++, (values[i] << 8));
++}
++
++static void
+ store_scanline_x14r6g6b6 (bits_image_t *  image,
+                           int             x,
+                           int             y,
+@@ -2845,6 +2940,8 @@ static const format_info_t accessors[] =
+     FORMAT_INFO (x8b8g8r8),
+     FORMAT_INFO (b8g8r8a8),
+     FORMAT_INFO (b8g8r8x8),
++    FORMAT_INFO (r8g8b8a8),
++    FORMAT_INFO (r8g8b8x8),
+     FORMAT_INFO (x14r6g6b6),
+ 
+ /* 24bpp formats */
+diff --git a/pixman/pixman.c b/pixman/pixman.c
+index ec565f9..f21af2f 100644
+--- a/pixman/pixman.c
++++ b/pixman/pixman.c
+@@ -873,6 +873,8 @@ color_to_pixel (pixman_color_t *     color,
+           format == PIXMAN_x8b8g8r8     ||
+           format == PIXMAN_b8g8r8a8     ||
+           format == PIXMAN_b8g8r8x8     ||
++          format == PIXMAN_r8g8b8a8     ||
++          format == PIXMAN_r8g8b8x8     ||
+           format == PIXMAN_r5g6b5       ||
+           format == PIXMAN_b5g6r5       ||
+           format == PIXMAN_a8           ||
+@@ -895,6 +897,8 @@ color_to_pixel (pixman_color_t *     color,
+ 	    ((c & 0x0000ff00) <<  8) |
+ 	    ((c & 0x000000ff) << 24);
+     }
++    if (PIXMAN_FORMAT_TYPE (format) == PIXMAN_TYPE_RGBA)
++	c = ((c & 0xff000000) >> 24) | (c << 8);
+ 
+     if (format == PIXMAN_a1)
+ 	c = c >> 31;
+@@ -1105,6 +1109,8 @@ pixman_format_supported_source (pixman_format_code_t format)
+     case PIXMAN_x8b8g8r8:
+     case PIXMAN_b8g8r8a8:
+     case PIXMAN_b8g8r8x8:
++    case PIXMAN_r8g8b8a8:
++    case PIXMAN_r8g8b8x8:
+     case PIXMAN_r8g8b8:
+     case PIXMAN_b8g8r8:
+     case PIXMAN_r5g6b5:
+diff --git a/pixman/pixman.h b/pixman/pixman.h
+index 1305bc1..59d0760 100644
+--- a/pixman/pixman.h
++++ b/pixman/pixman.h
+@@ -650,11 +650,13 @@ struct pixman_indexed
+ #define PIXMAN_TYPE_YUY2	6
+ #define PIXMAN_TYPE_YV12	7
+ #define PIXMAN_TYPE_BGRA	8
++#define PIXMAN_TYPE_RGBA	9
+ 
+ #define PIXMAN_FORMAT_COLOR(f)				\
+ 	(PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ARGB ||	\
+ 	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_ABGR ||	\
+-	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA)
++	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_BGRA ||	\
++	 PIXMAN_FORMAT_TYPE(f) == PIXMAN_TYPE_RGBA)
+ 
+ /* 32bpp formats */
+ typedef enum {
+@@ -664,6 +666,8 @@ typedef enum {
+     PIXMAN_x8b8g8r8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
+     PIXMAN_b8g8r8a8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
+     PIXMAN_b8g8r8x8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
++    PIXMAN_r8g8b8a8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,8,8,8,8),
++    PIXMAN_r8g8b8x8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_RGBA,0,8,8,8),
+     PIXMAN_x14r6g6b6 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,6,6,6),
+     PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
+     PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch b/recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch
new file mode 100644
index 0000000000..7809e2ae4a
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0028-Add-support-for-the-r8g8b8a8-and-r8g8b8x8-formats-to.patch
@@ -0,0 +1,110 @@
+From b514e63cfc58af21f7097db5a1b04292a758782a Mon Sep 17 00:00:00 2001
+From: Alexandros Frantzis <alexandros.frantzis@linaro.org>
+Date: Fri, 18 Mar 2011 14:37:27 +0200
+Subject: [PATCH 28/40] Add support for the r8g8b8a8 and r8g8b8x8 formats to the tests.
+
+---
+ test/blitters-test.c |    9 +++++++--
+ test/composite.c     |    9 +++++++++
+ test/fetch-test.c    |   10 ++++++++++
+ test/stress-test.c   |    2 ++
+ 4 files changed, 28 insertions(+), 2 deletions(-)
+
+diff --git a/test/blitters-test.c b/test/blitters-test.c
+index 63e7cb3..3ecfb09 100644
+--- a/test/blitters-test.c
++++ b/test/blitters-test.c
+@@ -88,8 +88,11 @@ free_random_image (uint32_t initcrc,
+ 	    uint32_t *data = pixman_image_get_data (img);
+ 	    uint32_t mask = (1 << PIXMAN_FORMAT_DEPTH (fmt)) - 1;
+ 
+-	    if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA)
++	    if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_BGRA ||
++		PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_RGBA)
++	    {
+ 		mask <<= (PIXMAN_FORMAT_BPP (fmt) - PIXMAN_FORMAT_DEPTH (fmt));
++	    }
+ 
+ 	    for (i = 0; i < 32; i++)
+ 		mask |= mask << (i * PIXMAN_FORMAT_BPP (fmt));
+@@ -182,6 +185,8 @@ static pixman_format_code_t img_fmt_list[] = {
+     PIXMAN_x8b8g8r8,
+     PIXMAN_b8g8r8a8,
+     PIXMAN_b8g8r8x8,
++    PIXMAN_r8g8b8a8,
++    PIXMAN_r8g8b8x8,
+     PIXMAN_x14r6g6b6,
+     PIXMAN_r8g8b8,
+     PIXMAN_b8g8r8,
+@@ -412,6 +417,6 @@ main (int argc, const char *argv[])
+     }
+ 
+     return fuzzer_test_main("blitters", 2000000,
+-			    0x1DB8BDF8,
++			    0x265CDFEB,
+ 			    test_composite, argc, argv);
+ }
+diff --git a/test/composite.c b/test/composite.c
+index e6d52b9..b0e0ba4 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -102,6 +102,8 @@ static const format_t formats[] =
+     P(x8b8g8r8),
+     P(b8g8r8a8),
+     P(b8g8r8x8),
++    P(r8g8b8a8),
++    P(r8g8b8x8),
+     P(x2r10g10b10),
+     P(x2b10g10r10),
+     P(a2r10g10b10),
+@@ -556,6 +558,13 @@ get_pixel (pixman_image_t *image,
+         bs = g + gs;
+ 	break;
+ 
++    case PIXMAN_TYPE_RGBA:
++	as = 0;
++	bs = PIXMAN_FORMAT_BPP (format) - (b + g + r);
++	gs = b + bs;
++	rs = g + gs;
++	break;
++
+     case PIXMAN_TYPE_A:
+         as = 0;
+         rs = 0;
+diff --git a/test/fetch-test.c b/test/fetch-test.c
+index 60bc765..feb98d9 100644
+--- a/test/fetch-test.c
++++ b/test/fetch-test.c
+@@ -34,6 +34,16 @@ static testcase_t testcases[] =
+ 	NULL,
+     },
+     {
++	PIXMAN_r8g8b8a8,
++	2, 2,
++	8,
++	{ 0x11223300, 0x55667744,
++	  0x99aabb88, 0xddeeffcc },
++	{ 0x00112233, 0x44556677,
++	  0x8899aabb, 0xccddeeff },
++	NULL,
++    },
++    {
+ 	PIXMAN_g1,
+ 	8, 2,
+ 	4,
+diff --git a/test/stress-test.c b/test/stress-test.c
+index d496f93..571420a 100644
+--- a/test/stress-test.c
++++ b/test/stress-test.c
+@@ -19,6 +19,8 @@ static const pixman_format_code_t image_formats[] =
+     PIXMAN_x8b8g8r8,
+     PIXMAN_b8g8r8a8,
+     PIXMAN_b8g8r8x8,
++    PIXMAN_r8g8b8a8,
++    PIXMAN_r8g8b8x8,
+     PIXMAN_x14r6g6b6,
+     PIXMAN_r8g8b8,
+     PIXMAN_b8g8r8,
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch b/recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch
new file mode 100644
index 0000000000..d9e4a380ee
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0029-test-Fix-infinite-loop-in-composite.patch
@@ -0,0 +1,37 @@
+From ad3cbfb073fc325e1b3152898ca71b8255675957 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?S=C3=B8ren=20Sandmann=20Pedersen?= <ssp@redhat.com>
+Date: Tue, 22 Mar 2011 13:42:05 -0400
+Subject: [PATCH 29/40] test: Fix infinite loop in composite
+
+When run in PIXMAN_RANDOMIZE_TESTS mode, this test would go into an
+infinite loop because the loop started at 'seed' but the stop
+condition was still N_TESTS.
+---
+ test/composite.c |    8 ++++----
+ 1 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/test/composite.c b/test/composite.c
+index b0e0ba4..9a001e5 100644
+--- a/test/composite.c
++++ b/test/composite.c
+@@ -909,13 +909,13 @@ main (int argc, char **argv)
+ #ifdef USE_OPENMP
+ #   pragma omp parallel for default(none) shared(result, argv, seed)
+ #endif
+-    for (i = seed; i <= N_TESTS; ++i)
++    for (i = 0; i <= N_TESTS; ++i)
+     {
+-	if (!result && !run_test (i))
++	if (!result && !run_test (i + seed))
+ 	{
+-	    printf ("Test 0x%08X failed.\n", i);
++	    printf ("Test 0x%08X failed.\n", seed + i);
+ 	    
+-	    result = i;
++	    result = seed + i;
+ 	}
+     }
+     
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch b/recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch
new file mode 100644
index 0000000000..831065cb3e
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0030-ARM-tweaked-horizontal-weights-update-in-NEON-biline.patch
@@ -0,0 +1,82 @@
+From 4a0ade2a1e96fe3f1bca8953be221af0b2908925 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 10 Mar 2011 15:34:10 +0200
+Subject: [PATCH 30/40] ARM: tweaked horizontal weights update in NEON bilinear scaling code
+
+Moving horizontal interpolation weights update instructions from the
+beginning of loop to its end allows to hide some pipeline stalls and
+improve performance.
+---
+ pixman/pixman-arm-neon-asm.S |   20 +++++++++++---------
+ 1 files changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 71b30ac..8788e95 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2558,8 +2558,7 @@ fname:
+     bilinear_load_&src_fmt d0, d1, d2
+     vmull.u8  q1, d0, d28
+     vmlal.u8  q1, d1, d29
+-    vshr.u16  d30, d24, #8
+-    /* 4 cycles bubble */
++    /* 5 cycles bubble */
+     vshll.u16 q0, d2, #8
+     vmlsl.u16 q0, d2, d30
+     vmlal.u16 q0, d3, d30
+@@ -2574,17 +2573,17 @@ fname:
+ .macro bilinear_interpolate_two_pixels src_fmt, dst_fmt
+     bilinear_load_and_vertical_interpolate_two_&src_fmt \
+                 q1, q11, d0, d1, d20, d21, d22, d23
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+     vshll.u16 q0, d2, #8
+     vmlsl.u16 q0, d2, d30
+     vmlal.u16 q0, d3, d30
+     vshll.u16 q10, d22, #8
+     vmlsl.u16 q10, d22, d31
+     vmlal.u16 q10, d23, d31
+-    vshrn.u32 d30, q0, #16
+-    vshrn.u32 d31, q10, #16
+-    vmovn.u16 d0, q15
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q10, #16
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    vmovn.u16 d0, q0
+     bilinear_store_&dst_fmt 2, q2, q3
+ .endm
+ 
+@@ -2593,8 +2592,6 @@ fname:
+                 q1, q11, d0, d1, d20, d21, d22, d23 \
+                 q3, q9,  d4, d5, d16, d17, d18, d19
+     pld       [TMP1, PF_OFFS]
+-    vshr.u16  q15, q12, #8
+-    vadd.u16  q12, q12, q13
+     vshll.u16 q0, d2, #8
+     vmlsl.u16 q0, d2, d30
+     vmlal.u16 q0, d3, d30
+@@ -2614,8 +2611,10 @@ fname:
+     vshrn.u32 d1, q10, #16
+     vshrn.u32 d4, q2, #16
+     vshrn.u32 d5, q8, #16
++    vshr.u16  q15, q12, #8
+     vmovn.u16 d0, q0
+     vmovn.u16 d1, q2
++    vadd.u16  q12, q12, q13
+     bilinear_store_&dst_fmt 4, q2, q3
+ .endm
+ 
+@@ -2669,6 +2668,9 @@ pixman_asm_function fname
+     vadd.u16  d25, d25, d26
+     vadd.u16  q13, q13, q13
+ 
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++
+     subs      WIDTH, WIDTH, #4
+     blt       1f
+     mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch b/recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch
new file mode 100644
index 0000000000..3c8394b983
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0031-ARM-use-aligned-memory-writes-in-NEON-bilinear-scali.patch
@@ -0,0 +1,124 @@
+From f36c189475951276766b2653ae9628c4d02dc0c9 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 10 Mar 2011 16:12:23 +0200
+Subject: [PATCH 31/40] ARM: use aligned memory writes in NEON bilinear scaling code
+
+---
+ pixman/pixman-arm-neon-asm.S |   49 ++++++++++++++++++++++++++++++------------
+ 1 files changed, 35 insertions(+), 14 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 8788e95..a4d6a9a 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2527,9 +2527,9 @@ fname:
+ 
+ .macro bilinear_store_8888 numpix, tmp1, tmp2
+ .if numpix == 4
+-    vst1.32   {d0, d1}, [OUT]!
++    vst1.32   {d0, d1}, [OUT, :128]!
+ .elseif numpix == 2
+-    vst1.32   {d0}, [OUT]!
++    vst1.32   {d0}, [OUT, :64]!
+ .elseif numpix == 1
+     vst1.32   {d0[0]}, [OUT, :32]!
+ .else
+@@ -2544,11 +2544,11 @@ fname:
+     vuzp.u8 d0, d2
+     convert_8888_to_0565 d2, d1, d0, q1, tmp1, tmp2
+ .if numpix == 4
+-    vst1.16   {d2}, [OUT]!
++    vst1.16   {d2}, [OUT, :64]!
+ .elseif numpix == 2
+-    vst1.32   {d2[0]}, [OUT]!
++    vst1.32   {d2[0]}, [OUT, :32]!
+ .elseif numpix == 1
+-    vst1.16   {d2[0]}, [OUT]!
++    vst1.16   {d2[0]}, [OUT, :16]!
+ .else
+     .error bilinear_store_0565 numpix is unsupported
+ .endif
+@@ -2622,8 +2622,7 @@ fname:
+  * Main template macro for generating NEON optimized bilinear scanline
+  * functions.
+  *
+- * TODO: use software pipelining and aligned writes to the destination buffer
+- *       in order to improve performance
++ * TODO: use software pipelining in order to improve performance
+  *
+  * Bilinear scanline scaler macro template uses the following arguments:
+  *  fname             - name of the function to generate
+@@ -2635,7 +2634,8 @@ fname:
+  */
+ 
+ .macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
+-                                       bpp_shift, prefetch_distance
++                                       src_bpp_shift, dst_bpp_shift, \
++                                       prefetch_distance
+ 
+ pixman_asm_function fname
+     OUT       .req      r0
+@@ -2666,19 +2666,40 @@ pixman_asm_function fname
+     vdup.u8   d28, WT
+     vdup.u8   d29, WB
+     vadd.u16  d25, d25, d26
+-    vadd.u16  q13, q13, q13
+ 
++    /* ensure good destination alignment  */
++    cmp       WIDTH, #1
++    blt       0f
++    tst       OUT, #(1 << dst_bpp_shift)
++    beq       0f
++    vshr.u16  q15, q12, #8
++    vadd.u16  q12, q12, q13
++    bilinear_interpolate_last_pixel src_fmt, dst_fmt
++    sub       WIDTH, WIDTH, #1
++0:
++    vadd.u16  q13, q13, q13
+     vshr.u16  q15, q12, #8
+     vadd.u16  q12, q12, q13
+ 
++    cmp       WIDTH, #2
++    blt       0f
++    tst       OUT, #(1 << (dst_bpp_shift + 1))
++    beq       0f
++    bilinear_interpolate_two_pixels src_fmt, dst_fmt
++    sub       WIDTH, WIDTH, #2
++0:
++
++    /* start the main loop */
+     subs      WIDTH, WIDTH, #4
+     blt       1f
+-    mov       PF_OFFS, PF_OFFS, asr #(16 - bpp_shift)
++    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+ 0:
+     bilinear_interpolate_four_pixels src_fmt, dst_fmt
+     subs      WIDTH, WIDTH, #4
+     bge       0b
+ 1:
++
++    /* handle the remaining trailing pixels */
+     tst       WIDTH, #2
+     beq       2f
+     bilinear_interpolate_two_pixels src_fmt, dst_fmt
+@@ -2708,13 +2729,13 @@ pixman_asm_function fname
+ .endm
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 28
++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 2, 28
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 28
++    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 1, 28
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 28
++    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 2, 28
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 28
++    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 1, 28
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch b/recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch
new file mode 100644
index 0000000000..c67f9c638f
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0032-ARM-support-for-software-pipelining-in-bilinear-macr.patch
@@ -0,0 +1,70 @@
+From 6d296598575b8307262fac2cf438d7cc832d09d3 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 16 Mar 2011 16:33:41 +0200
+Subject: [PATCH 32/40] ARM: support for software pipelining in bilinear macros
+
+Now it's possible to override the main loop of bilinear scaling code
+with optimized pipelined implementation.
+---
+ pixman/pixman-arm-neon-asm.S |   31 ++++++++++++++++++++++++++++---
+ 1 files changed, 28 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index a4d6a9a..d84f2cc 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2618,12 +2618,32 @@ fname:
+     bilinear_store_&dst_fmt 4, q2, q3
+ .endm
+ 
++.macro bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
++    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_head
++.else
++    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
++    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail
++.endif
++.endm
++
++.macro bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt
++    bilinear_interpolate_four_pixels_&src_fmt&_&dst_fmt&_tail_head
++.else
++    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++.endif
++.endm
++
+ /*
+  * Main template macro for generating NEON optimized bilinear scanline
+  * functions.
+  *
+- * TODO: use software pipelining in order to improve performance
+- *
+  * Bilinear scanline scaler macro template uses the following arguments:
+  *  fname             - name of the function to generate
+  *  src_fmt           - source color format (8888 or 0565)
+@@ -2693,10 +2713,15 @@ pixman_asm_function fname
+     subs      WIDTH, WIDTH, #4
+     blt       1f
+     mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
++    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++    subs      WIDTH, WIDTH, #4
++    blt       5f
+ 0:
+-    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
+     subs      WIDTH, WIDTH, #4
+     bge       0b
++5:
++    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ 1:
+ 
+     /* handle the remaining trailing pixels */
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch b/recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch
new file mode 100644
index 0000000000..1d66979f99
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0033-ARM-use-less-ARM-instructions-in-NEON-bilinear-scali.patch
@@ -0,0 +1,168 @@
+From ec2da8e651767421a8403bf0810445fdec1315ba Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Mar 2011 18:41:53 +0200
+Subject: [PATCH 33/40] ARM: use less ARM instructions in NEON bilinear scaling code
+
+This reduces code size and also puts less pressure on the
+instruction decoder.
+---
+ pixman/pixman-arm-neon-asm.S |   79 ++++++++++++++++++++----------------------
+ 1 files changed, 38 insertions(+), 41 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index d84f2cc..9878bf7 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2412,21 +2412,19 @@ fname:
+  */
+ 
+ .macro bilinear_load_8888 reg1, reg2, tmp
+-    mov       TMP2, X, asr #16
++    mov       TMP1, X, asr #16
+     add       X, X, UX
+-    add       TMP1, TOP, TMP2, asl #2
+-    add       TMP2, BOTTOM, TMP2, asl #2
+-    vld1.32   {reg1}, [TMP1]
+-    vld1.32   {reg2}, [TMP2]
++    add       TMP1, TOP, TMP1, asl #2
++    vld1.32   {reg1}, [TMP1], STRIDE
++    vld1.32   {reg2}, [TMP1]
+ .endm
+ 
+ .macro bilinear_load_0565 reg1, reg2, tmp
+-    mov       TMP2, X, asr #16
++    mov       TMP1, X, asr #16
+     add       X, X, UX
+-    add       TMP1, TOP, TMP2, asl #1
+-    add       TMP2, BOTTOM, TMP2, asl #1
+-    vld1.32   {reg2[0]}, [TMP1]
+-    vld1.32   {reg2[1]}, [TMP2]
++    add       TMP1, TOP, TMP1, asl #1
++    vld1.32   {reg2[0]}, [TMP1], STRIDE
++    vld1.32   {reg2[1]}, [TMP1]
+     convert_four_0565_to_x888_packed reg2, reg1, reg2, tmp
+ .endm
+ 
+@@ -2454,18 +2452,16 @@ fname:
+ .macro bilinear_load_and_vertical_interpolate_two_0565 \
+                 acc1, acc2, reg1, reg2, reg3, reg4, acc2lo, acc2hi
+ 
+-    mov       TMP2, X, asr #16
++    mov       TMP1, X, asr #16
+     add       X, X, UX
+-    mov       TMP4, X, asr #16
++    add       TMP1, TOP, TMP1, asl #1
++    mov       TMP2, X, asr #16
+     add       X, X, UX
+-    add       TMP1, TOP, TMP2, asl #1
+-    add       TMP2, BOTTOM, TMP2, asl #1
+-    add       TMP3, TOP, TMP4, asl #1
+-    add       TMP4, BOTTOM, TMP4, asl #1
+-    vld1.32   {acc2lo[0]}, [TMP1]
+-    vld1.32   {acc2hi[0]}, [TMP3]
+-    vld1.32   {acc2lo[1]}, [TMP2]
+-    vld1.32   {acc2hi[1]}, [TMP4]
++    add       TMP2, TOP, TMP2, asl #1
++    vld1.32   {acc2lo[0]}, [TMP1], STRIDE
++    vld1.32   {acc2hi[0]}, [TMP2], STRIDE
++    vld1.32   {acc2lo[1]}, [TMP1]
++    vld1.32   {acc2hi[1]}, [TMP2]
+     convert_0565_to_x888 acc2, reg3, reg2, reg1
+     vzip.u8   reg1, reg3
+     vzip.u8   reg2, reg4
+@@ -2481,34 +2477,30 @@ fname:
+                 xacc1, xacc2, xreg1, xreg2, xreg3, xreg4, xacc2lo, xacc2hi \
+                 yacc1, yacc2, yreg1, yreg2, yreg3, yreg4, yacc2lo, yacc2hi
+ 
+-    mov       TMP2, X, asr #16
++    mov       TMP1, X, asr #16
+     add       X, X, UX
+-    mov       TMP4, X, asr #16
++    add       TMP1, TOP, TMP1, asl #1
++    mov       TMP2, X, asr #16
+     add       X, X, UX
+-    add       TMP1, TOP, TMP2, asl #1
+-    add       TMP2, BOTTOM, TMP2, asl #1
+-    add       TMP3, TOP, TMP4, asl #1
+-    add       TMP4, BOTTOM, TMP4, asl #1
+-    vld1.32   {xacc2lo[0]}, [TMP1]
+-    vld1.32   {xacc2hi[0]}, [TMP3]
+-    vld1.32   {xacc2lo[1]}, [TMP2]
+-    vld1.32   {xacc2hi[1]}, [TMP4]
++    add       TMP2, TOP, TMP2, asl #1
++    vld1.32   {xacc2lo[0]}, [TMP1], STRIDE
++    vld1.32   {xacc2hi[0]}, [TMP2], STRIDE
++    vld1.32   {xacc2lo[1]}, [TMP1]
++    vld1.32   {xacc2hi[1]}, [TMP2]
+     convert_0565_to_x888 xacc2, xreg3, xreg2, xreg1
+-    mov       TMP2, X, asr #16
++    mov       TMP1, X, asr #16
+     add       X, X, UX
+-    mov       TMP4, X, asr #16
++    add       TMP1, TOP, TMP1, asl #1
++    mov       TMP2, X, asr #16
+     add       X, X, UX
+-    add       TMP1, TOP, TMP2, asl #1
+-    add       TMP2, BOTTOM, TMP2, asl #1
+-    add       TMP3, TOP, TMP4, asl #1
+-    add       TMP4, BOTTOM, TMP4, asl #1
+-    vld1.32   {yacc2lo[0]}, [TMP1]
++    add       TMP2, TOP, TMP2, asl #1
++    vld1.32   {yacc2lo[0]}, [TMP1], STRIDE
+     vzip.u8   xreg1, xreg3
+-    vld1.32   {yacc2hi[0]}, [TMP3]
++    vld1.32   {yacc2hi[0]}, [TMP2], STRIDE
+     vzip.u8   xreg2, xreg4
+-    vld1.32   {yacc2lo[1]}, [TMP2]
++    vld1.32   {yacc2lo[1]}, [TMP1]
+     vzip.u8   xreg3, xreg4
+-    vld1.32   {yacc2hi[1]}, [TMP4]
++    vld1.32   {yacc2hi[1]}, [TMP2]
+     vzip.u8   xreg1, xreg2
+     convert_0565_to_x888 yacc2, yreg3, yreg2, yreg1
+     vmull.u8  xacc1, xreg1, d28
+@@ -2592,6 +2584,7 @@ fname:
+                 q1, q11, d0, d1, d20, d21, d22, d23 \
+                 q3, q9,  d4, d5, d16, d17, d18, d19
+     pld       [TMP1, PF_OFFS]
++    sub       TMP1, TMP1, STRIDE
+     vshll.u16 q0, d2, #8
+     vmlsl.u16 q0, d2, d30
+     vmlal.u16 q0, d3, d30
+@@ -2671,6 +2664,7 @@ pixman_asm_function fname
+     PF_OFFS   .req      r7
+     TMP3      .req      r8
+     TMP4      .req      r9
++    STRIDE    .req      r2
+ 
+     mov       ip, sp
+     push      {r4, r5, r6, r7, r8, r9}
+@@ -2678,6 +2672,9 @@ pixman_asm_function fname
+     ldmia     ip, {WB, X, UX, WIDTH}
+     mul       PF_OFFS, PF_OFFS, UX
+ 
++    sub       STRIDE, BOTTOM, TOP
++    .unreq    BOTTOM
++
+     cmp       WIDTH, #0
+     ble       3f
+ 
+@@ -2738,7 +2735,6 @@ pixman_asm_function fname
+ 
+     .unreq    OUT
+     .unreq    TOP
+-    .unreq    BOTTOM
+     .unreq    WT
+     .unreq    WB
+     .unreq    X
+@@ -2749,6 +2745,7 @@ pixman_asm_function fname
+     .unreq    PF_OFFS
+     .unreq    TMP3
+     .unreq    TMP4
++    .unreq    STRIDE
+ .endfunc
+ 
+ .endm
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch b/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
new file mode 100644
index 0000000000..82661f0869
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0034-ARM-support-different-levels-of-loop-unrolling-in-bi.patch
@@ -0,0 +1,156 @@
+From cd20ceb7602348ecbfa0db1756dc548a0bad3c9d Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 17 Mar 2011 19:42:01 +0200
+Subject: [PATCH 34/40] ARM: support different levels of loop unrolling in bilinear scaler
+
+Now an extra 'flag' parameter is supported in bilinear scaline scaling
+function generation macro. It can be used to enable 4 or 8 pixels per
+loop iteration unrolling and provide save/restore code for d8-d15
+registers.
+---
+ pixman/pixman-arm-neon-asm.S |   84 ++++++++++++++++++++++++++++++++++++++----
+ 1 files changed, 76 insertions(+), 8 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 9878bf7..6141770 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2633,6 +2633,36 @@ fname:
+ .endif
+ .endm
+ 
++.macro bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_head
++.else
++    bilinear_interpolate_four_pixels_head src_fmt, dst_fmt
++    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail
++.else
++    bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
++.endif
++.endm
++
++.macro bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
++.ifdef have_bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt
++    bilinear_interpolate_eight_pixels_&src_fmt&_&dst_fmt&_tail_head
++.else
++    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++    bilinear_interpolate_four_pixels_tail_head src_fmt, dst_fmt
++.endif
++.endm
++
++.set BILINEAR_FLAG_UNROLL_4,          0
++.set BILINEAR_FLAG_UNROLL_8,          1
++.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
++
+ /*
+  * Main template macro for generating NEON optimized bilinear scanline
+  * functions.
+@@ -2648,7 +2678,7 @@ fname:
+ 
+ .macro generate_bilinear_scanline_func fname, src_fmt, dst_fmt, \
+                                        src_bpp_shift, dst_bpp_shift, \
+-                                       prefetch_distance
++                                       prefetch_distance, flags
+ 
+ pixman_asm_function fname
+     OUT       .req      r0
+@@ -2672,6 +2702,10 @@ pixman_asm_function fname
+     ldmia     ip, {WB, X, UX, WIDTH}
+     mul       PF_OFFS, PF_OFFS, UX
+ 
++.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
++    vpush     {d8-d15}
++.endif
++
+     sub       STRIDE, BOTTOM, TOP
+     .unreq    BOTTOM
+ 
+@@ -2705,8 +2739,34 @@ pixman_asm_function fname
+     bilinear_interpolate_two_pixels src_fmt, dst_fmt
+     sub       WIDTH, WIDTH, #2
+ 0:
+-
+-    /* start the main loop */
++.if ((flags) & BILINEAR_FLAG_UNROLL_8) != 0
++/*********** 8 pixels per iteration *****************/
++    cmp       WIDTH, #4
++    blt       0f
++    tst       OUT, #(1 << (dst_bpp_shift + 2))
++    beq       0f
++    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++    sub       WIDTH, WIDTH, #4
++0:
++    subs      WIDTH, WIDTH, #8
++    blt       1f
++    mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
++    bilinear_interpolate_eight_pixels_head src_fmt, dst_fmt
++    subs      WIDTH, WIDTH, #8
++    blt       5f
++0:
++    bilinear_interpolate_eight_pixels_tail_head src_fmt, dst_fmt
++    subs      WIDTH, WIDTH, #8
++    bge       0b
++5:
++    bilinear_interpolate_eight_pixels_tail src_fmt, dst_fmt
++1:
++    tst       WIDTH, #4
++    beq       2f
++    bilinear_interpolate_four_pixels src_fmt, dst_fmt
++2:
++.else
++/*********** 4 pixels per iteration *****************/
+     subs      WIDTH, WIDTH, #4
+     blt       1f
+     mov       PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
+@@ -2720,7 +2780,8 @@ pixman_asm_function fname
+ 5:
+     bilinear_interpolate_four_pixels_tail src_fmt, dst_fmt
+ 1:
+-
++/****************************************************/
++.endif
+     /* handle the remaining trailing pixels */
+     tst       WIDTH, #2
+     beq       2f
+@@ -2730,6 +2791,9 @@ pixman_asm_function fname
+     beq       3f
+     bilinear_interpolate_last_pixel src_fmt, dst_fmt
+ 3:
++.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
++    vpop      {d8-d15}
++.endif
+     pop       {r4, r5, r6, r7, r8, r9}
+     bx        lr
+ 
+@@ -2751,13 +2815,17 @@ pixman_asm_function fname
+ .endm
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, 2, 2, 28
++    pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
++    2, 2, 28, BILINEAR_FLAG_UNROLL_4
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, 2, 1, 28
++    pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
++    2, 1, 28, BILINEAR_FLAG_UNROLL_4
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, 1, 2, 28
++    pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
++    1, 2, 28, BILINEAR_FLAG_UNROLL_4
+ 
+ generate_bilinear_scanline_func \
+-    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, 1, 1, 28
++    pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_neon, 0565, 0565, \
++    1, 1, 28, BILINEAR_FLAG_UNROLL_4
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch b/recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
new file mode 100644
index 0000000000..c0d485cae4
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0035-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
@@ -0,0 +1,166 @@
+From d3b1ca20fe8af20ca097dcc8799ef25cee03dd6b Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 16 Mar 2011 17:24:49 +0200
+Subject: [PATCH 35/40] ARM: pipelined NEON implementation of bilinear scaled 'src_8888_8888'
+
+Performance of the inner loop when working with the data in L1 cache:
+    ARM Cortex-A8: 41 cycles per 4 pixels (no stalls and partial dual issue)
+    ARM Cortex-A9: 48 cycles per 4 pixels (no stalls)
+
+It might be still possible to improve performance even more on ARM Cortex-A8
+with a better use of dual issue.
+
+Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=40.38 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=48.47 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=20028888, speed=79.68 MPix/s
+  after:  op=1, src=20028888, dst=20028888, speed=93.11 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  127 ++++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 127 insertions(+), 0 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 6141770..326e085 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2814,6 +2814,133 @@ pixman_asm_function fname
+ 
+ .endm
+ 
++/*****************************************************************************/
++
++.set have_bilinear_interpolate_four_pixels_8888_8888, 1
++
++.macro bilinear_interpolate_four_pixels_8888_8888_head
++    mov       TMP1, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP2, TOP, TMP2, asl #2
++
++    vld1.32   {d22}, [TMP1], STRIDE
++    vld1.32   {d23}, [TMP1]
++    mov       TMP3, X, asr #16
++    add       X, X, UX
++    add       TMP3, TOP, TMP3, asl #2
++    vmull.u8  q8, d22, d28
++    vmlal.u8  q8, d23, d29
++
++    vld1.32   {d22}, [TMP2], STRIDE
++    vld1.32   {d23}, [TMP2]
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP4, TOP, TMP4, asl #2
++    vmull.u8  q9, d22, d28
++    vmlal.u8  q9, d23, d29
++
++    vld1.32   {d22}, [TMP3], STRIDE
++    vld1.32   {d23}, [TMP3]
++    vmull.u8  q10, d22, d28
++    vmlal.u8  q10, d23, d29
++
++    vshll.u16 q0, d16, #8
++    vmlsl.u16 q0, d16, d30
++    vmlal.u16 q0, d17, d30
++
++    pld       [TMP4, PF_OFFS]
++    vld1.32   {d16}, [TMP4], STRIDE
++    vld1.32   {d17}, [TMP4]
++    pld       [TMP4, PF_OFFS]
++    vmull.u8  q11, d16, d28
++    vmlal.u8  q11, d17, d29
++
++    vshll.u16 q1, d18, #8
++    vmlsl.u16 q1, d18, d31
++.endm
++
++.macro bilinear_interpolate_four_pixels_8888_8888_tail
++    vmlal.u16 q1, d19, d31
++    vshr.u16  q15, q12, #8
++    vshll.u16 q2, d20, #8
++    vmlsl.u16 q2, d20, d30
++    vmlal.u16 q2, d21, d30
++    vshll.u16 q3, d22, #8
++    vmlsl.u16 q3, d22, d31
++    vmlal.u16 q3, d23, d31
++    vadd.u16  q12, q12, q13
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q1, #16
++    vshrn.u32 d4, q2, #16
++    vshr.u16  q15, q12, #8
++    vshrn.u32 d5, q3, #16
++    vmovn.u16 d6, q0
++    vmovn.u16 d7, q2
++    vadd.u16  q12, q12, q13
++    vst1.32   {d6, d7}, [OUT, :128]!
++.endm
++
++.macro bilinear_interpolate_four_pixels_8888_8888_tail_head
++    mov       TMP1, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP2, TOP, TMP2, asl #2
++        vmlal.u16 q1, d19, d31
++        vshr.u16  q15, q12, #8
++        vshll.u16 q2, d20, #8
++        vmlsl.u16 q2, d20, d30
++        vmlal.u16 q2, d21, d30
++        vshll.u16 q3, d22, #8
++    vld1.32   {d20}, [TMP1], STRIDE
++        vmlsl.u16 q3, d22, d31
++        vmlal.u16 q3, d23, d31
++    vld1.32   {d21}, [TMP1]
++    vmull.u8  q8, d20, d28
++    vmlal.u8  q8, d21, d29
++        vshrn.u32 d0, q0, #16
++        vshrn.u32 d1, q1, #16
++        vshrn.u32 d4, q2, #16
++    vld1.32   {d22}, [TMP2], STRIDE
++        vshrn.u32 d5, q3, #16
++        vadd.u16  q12, q12, q13
++    vld1.32   {d23}, [TMP2]
++    vmull.u8  q9, d22, d28
++    mov       TMP3, X, asr #16
++    add       X, X, UX
++    add       TMP3, TOP, TMP3, asl #2
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP4, TOP, TMP4, asl #2
++    vmlal.u8  q9, d23, d29
++    vld1.32   {d22}, [TMP3], STRIDE
++        vshr.u16  q15, q12, #8
++    vld1.32   {d23}, [TMP3]
++    vmull.u8  q10, d22, d28
++    vmlal.u8  q10, d23, d29
++        vmovn.u16 d6, q0
++    vshll.u16 q0, d16, #8
++        vmovn.u16 d7, q2
++    vmlsl.u16 q0, d16, d30
++    vmlal.u16 q0, d17, d30
++    pld       [TMP4, PF_OFFS]
++    vld1.32   {d16}, [TMP4], STRIDE
++        vadd.u16  q12, q12, q13
++    vld1.32   {d17}, [TMP4]
++    pld       [TMP4, PF_OFFS]
++    vmull.u8  q11, d16, d28
++    vmlal.u8  q11, d17, d29
++        vst1.32   {d6, d7}, [OUT, :128]!
++    vshll.u16 q1, d18, #8
++    vmlsl.u16 q1, d18, d31
++.endm
++
++/*****************************************************************************/
++
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
+     2, 2, 28, BILINEAR_FLAG_UNROLL_4
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch b/recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
new file mode 100644
index 0000000000..4fca16fb9e
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0036-ARM-pipelined-NEON-implementation-of-bilinear-scaled.patch
@@ -0,0 +1,283 @@
+From dfccf9b97acbff6e847e4e52c5dec0a4297d30a0 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Mon, 21 Mar 2011 20:25:27 +0200
+Subject: [PATCH 36/40] ARM: pipelined NEON implementation of bilinear scaled 'src_8888_0565'
+
+Benchmark on ARM Cortex-A8 r1p3 @600MHz, 32-bit LPDDR @166MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=10020565, speed=33.59 MPix/s
+  after:  op=1, src=20028888, dst=10020565, speed=46.25 MPix/s
+
+Benchmark on ARM Cortex-A8 r2p2 @1GHz, 32-bit LPDDR @200MHz:
+ Microbenchmark (scaling 2000x2000 image with scale factor close to 1x):
+  before: op=1, src=20028888, dst=10020565, speed=63.86 MPix/s
+  after:  op=1, src=20028888, dst=10020565, speed=84.22 MPix/s
+---
+ pixman/pixman-arm-neon-asm.S |  245 +++++++++++++++++++++++++++++++++++++++++-
+ 1 files changed, 244 insertions(+), 1 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
+index 326e085..e560bdf 100644
+--- a/pixman/pixman-arm-neon-asm.S
++++ b/pixman/pixman-arm-neon-asm.S
+@@ -2941,13 +2941,256 @@ pixman_asm_function fname
+ 
+ /*****************************************************************************/
+ 
++.set have_bilinear_interpolate_eight_pixels_8888_0565, 1
++
++.macro bilinear_interpolate_eight_pixels_8888_0565_head
++    mov       TMP1, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP2, TOP, TMP2, asl #2
++    vld1.32   {d20}, [TMP1], STRIDE
++    vld1.32   {d21}, [TMP1]
++    vmull.u8  q8, d20, d28
++    vmlal.u8  q8, d21, d29
++    vld1.32   {d22}, [TMP2], STRIDE
++    vld1.32   {d23}, [TMP2]
++    vmull.u8  q9, d22, d28
++    mov       TMP3, X, asr #16
++    add       X, X, UX
++    add       TMP3, TOP, TMP3, asl #2
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP4, TOP, TMP4, asl #2
++    vmlal.u8  q9, d23, d29
++    vld1.32   {d22}, [TMP3], STRIDE
++    vld1.32   {d23}, [TMP3]
++    vmull.u8  q10, d22, d28
++    vmlal.u8  q10, d23, d29
++    vshll.u16 q0, d16, #8
++    vmlsl.u16 q0, d16, d30
++    vmlal.u16 q0, d17, d30
++    pld       [TMP4, PF_OFFS]
++    vld1.32   {d16}, [TMP4], STRIDE
++    vld1.32   {d17}, [TMP4]
++    pld       [TMP4, PF_OFFS]
++    vmull.u8  q11, d16, d28
++    vmlal.u8  q11, d17, d29
++    vshll.u16 q1, d18, #8
++    vmlsl.u16 q1, d18, d31
++
++    mov       TMP1, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP2, TOP, TMP2, asl #2
++        vmlal.u16 q1, d19, d31
++        vshr.u16  q15, q12, #8
++        vshll.u16 q2, d20, #8
++        vmlsl.u16 q2, d20, d30
++        vmlal.u16 q2, d21, d30
++        vshll.u16 q3, d22, #8
++    vld1.32   {d20}, [TMP1], STRIDE
++        vmlsl.u16 q3, d22, d31
++        vmlal.u16 q3, d23, d31
++    vld1.32   {d21}, [TMP1]
++    vmull.u8  q8, d20, d28
++    vmlal.u8  q8, d21, d29
++        vshrn.u32 d0, q0, #16
++        vshrn.u32 d1, q1, #16
++        vshrn.u32 d4, q2, #16
++    vld1.32   {d22}, [TMP2], STRIDE
++        vshrn.u32 d5, q3, #16
++        vadd.u16  q12, q12, q13
++    vld1.32   {d23}, [TMP2]
++    vmull.u8  q9, d22, d28
++    mov       TMP3, X, asr #16
++    add       X, X, UX
++    add       TMP3, TOP, TMP3, asl #2
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP4, TOP, TMP4, asl #2
++    vmlal.u8  q9, d23, d29
++    vld1.32   {d22}, [TMP3], STRIDE
++        vshr.u16  q15, q12, #8
++    vld1.32   {d23}, [TMP3]
++    vmull.u8  q10, d22, d28
++    vmlal.u8  q10, d23, d29
++        vmovn.u16 d8, q0
++    vshll.u16 q0, d16, #8
++        vmovn.u16 d9, q2
++    vmlsl.u16 q0, d16, d30
++    vmlal.u16 q0, d17, d30
++    pld       [TMP4, PF_OFFS]
++    vld1.32   {d16}, [TMP4], STRIDE
++        vadd.u16  q12, q12, q13
++    vld1.32   {d17}, [TMP4]
++    pld       [TMP4, PF_OFFS]
++    vmull.u8  q11, d16, d28
++    vmlal.u8  q11, d17, d29
++    vshll.u16 q1, d18, #8
++    vmlsl.u16 q1, d18, d31
++.endm
++
++.macro bilinear_interpolate_eight_pixels_8888_0565_tail
++    vmlal.u16 q1, d19, d31
++    vshr.u16  q15, q12, #8
++    vshll.u16 q2, d20, #8
++    vmlsl.u16 q2, d20, d30
++    vmlal.u16 q2, d21, d30
++    vshll.u16 q3, d22, #8
++    vmlsl.u16 q3, d22, d31
++    vmlal.u16 q3, d23, d31
++    vadd.u16  q12, q12, q13
++    vshrn.u32 d0, q0, #16
++    vshrn.u32 d1, q1, #16
++    vshrn.u32 d4, q2, #16
++    vshr.u16  q15, q12, #8
++    vshrn.u32 d5, q3, #16
++    vmovn.u16 d10, q0
++    vmovn.u16 d11, q2
++    vadd.u16  q12, q12, q13
++
++    vuzp.u8   d8, d9
++    vuzp.u8   d10, d11
++    vuzp.u8   d9, d11
++    vuzp.u8   d8, d10
++    vshll.u8  q6, d9, #8
++    vshll.u8  q5, d10, #8
++    vshll.u8  q7, d8, #8
++    vsri.u16  q5, q6, #5
++    vsri.u16  q5, q7, #11
++    vst1.32   {d10, d11}, [OUT, :128]!
++.endm
++
++.macro bilinear_interpolate_eight_pixels_8888_0565_tail_head
++    mov       TMP1, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP2, TOP, TMP2, asl #2
++        vmlal.u16 q1, d19, d31
++        vshr.u16  q15, q12, #8
++            vuzp.u8 d8, d9
++        vshll.u16 q2, d20, #8
++        vmlsl.u16 q2, d20, d30
++        vmlal.u16 q2, d21, d30
++        vshll.u16 q3, d22, #8
++    vld1.32   {d20}, [TMP1], STRIDE
++        vmlsl.u16 q3, d22, d31
++        vmlal.u16 q3, d23, d31
++    vld1.32   {d21}, [TMP1]
++    vmull.u8  q8, d20, d28
++    vmlal.u8  q8, d21, d29
++        vshrn.u32 d0, q0, #16
++        vshrn.u32 d1, q1, #16
++        vshrn.u32 d4, q2, #16
++    vld1.32   {d22}, [TMP2], STRIDE
++        vshrn.u32 d5, q3, #16
++        vadd.u16  q12, q12, q13
++    vld1.32   {d23}, [TMP2]
++    vmull.u8  q9, d22, d28
++    mov       TMP3, X, asr #16
++    add       X, X, UX
++    add       TMP3, TOP, TMP3, asl #2
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP4, TOP, TMP4, asl #2
++    vmlal.u8  q9, d23, d29
++    vld1.32   {d22}, [TMP3], STRIDE
++        vshr.u16  q15, q12, #8
++    vld1.32   {d23}, [TMP3]
++    vmull.u8  q10, d22, d28
++    vmlal.u8  q10, d23, d29
++        vmovn.u16 d10, q0
++    vshll.u16 q0, d16, #8
++        vmovn.u16 d11, q2
++    vmlsl.u16 q0, d16, d30
++    vmlal.u16 q0, d17, d30
++    pld       [TMP4, PF_OFFS]
++    vld1.32   {d16}, [TMP4], STRIDE
++        vadd.u16  q12, q12, q13
++    vld1.32   {d17}, [TMP4]
++    pld       [TMP4, PF_OFFS]
++    vmull.u8  q11, d16, d28
++    vmlal.u8  q11, d17, d29
++            vuzp.u8 d10, d11
++    vshll.u16 q1, d18, #8
++    vmlsl.u16 q1, d18, d31
++
++    mov       TMP1, X, asr #16
++    add       X, X, UX
++    add       TMP1, TOP, TMP1, asl #2
++    mov       TMP2, X, asr #16
++    add       X, X, UX
++    add       TMP2, TOP, TMP2, asl #2
++        vmlal.u16 q1, d19, d31
++            vuzp.u8 d9, d11
++        vshr.u16  q15, q12, #8
++        vshll.u16 q2, d20, #8
++            vuzp.u8 d8, d10
++        vmlsl.u16 q2, d20, d30
++        vmlal.u16 q2, d21, d30
++        vshll.u16 q3, d22, #8
++    vld1.32   {d20}, [TMP1], STRIDE
++        vmlsl.u16 q3, d22, d31
++        vmlal.u16 q3, d23, d31
++    vld1.32   {d21}, [TMP1]
++    vmull.u8  q8, d20, d28
++    vmlal.u8  q8, d21, d29
++            vshll.u8  q6, d9, #8
++            vshll.u8  q5, d10, #8
++            vshll.u8  q7, d8, #8
++        vshrn.u32 d0, q0, #16
++            vsri.u16  q5, q6, #5
++        vshrn.u32 d1, q1, #16
++            vsri.u16  q5, q7, #11
++        vshrn.u32 d4, q2, #16
++    vld1.32   {d22}, [TMP2], STRIDE
++        vshrn.u32 d5, q3, #16
++        vadd.u16  q12, q12, q13
++    vld1.32   {d23}, [TMP2]
++    vmull.u8  q9, d22, d28
++    mov       TMP3, X, asr #16
++    add       X, X, UX
++    add       TMP3, TOP, TMP3, asl #2
++    mov       TMP4, X, asr #16
++    add       X, X, UX
++    add       TMP4, TOP, TMP4, asl #2
++    vmlal.u8  q9, d23, d29
++    vld1.32   {d22}, [TMP3], STRIDE
++        vshr.u16  q15, q12, #8
++    vld1.32   {d23}, [TMP3]
++    vmull.u8  q10, d22, d28
++    vmlal.u8  q10, d23, d29
++        vmovn.u16 d8, q0
++    vshll.u16 q0, d16, #8
++        vmovn.u16 d9, q2
++    vmlsl.u16 q0, d16, d30
++    vmlal.u16 q0, d17, d30
++    pld       [TMP4, PF_OFFS]
++    vld1.32   {d16}, [TMP4], STRIDE
++        vadd.u16  q12, q12, q13
++    vld1.32   {d17}, [TMP4]
++    pld       [TMP4, PF_OFFS]
++    vmull.u8  q11, d16, d28
++    vmlal.u8  q11, d17, d29
++    vshll.u16 q1, d18, #8
++            vst1.32   {d10, d11}, [OUT, :128]!
++    vmlsl.u16 q1, d18, d31
++.endm
++/*****************************************************************************/
++
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_neon, 8888, 8888, \
+     2, 2, 28, BILINEAR_FLAG_UNROLL_4
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_neon, 8888, 0565, \
+-    2, 1, 28, BILINEAR_FLAG_UNROLL_4
++    2, 1, 28, BILINEAR_FLAG_UNROLL_8 | BILINEAR_FLAG_USE_ALL_NEON_REGS
+ 
+ generate_bilinear_scanline_func \
+     pixman_scaled_bilinear_scanline_0565_x888_SRC_asm_neon, 0565, 8888, \
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch b/recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
new file mode 100644
index 0000000000..e03823b185
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0037-Generic-C-implementation-of-pixman_blt-with-overlapp.patch
@@ -0,0 +1,114 @@
+From ab52f97fa306f73b51f797a33614280d31ccb978 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Tue, 16 Mar 2010 16:55:28 +0100
+Subject: [PATCH 37/40] Generic C implementation of pixman_blt with overlapping support
+
+Uses memcpy/memmove functions to copy pixels, can handle the
+case when both source and destination areas are in the same
+image (this is useful for scrolling).
+
+It is assumed that copying direction is only important when
+using the same image for both source and destination (and
+src_stride == dst_stride). Copying direction is undefined
+for the images with different source and destination stride
+which happen to be in the overlapped areas (but this is an
+unrealistic case anyway).
+---
+ pixman/pixman-general.c |   21 ++++++++++++++++++---
+ pixman/pixman-private.h |   43 +++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 61 insertions(+), 3 deletions(-)
+
+diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
+index 727affc..fa448f7 100644
+--- a/pixman/pixman-general.c
++++ b/pixman/pixman-general.c
+@@ -238,9 +238,24 @@ general_blt (pixman_implementation_t *imp,
+              int                      width,
+              int                      height)
+ {
+-    /* We can't blit unless we have sse2 or mmx */
+-
+-    return FALSE;
++    uint8_t *dst_bytes = (uint8_t *)dst_bits;
++    uint8_t *src_bytes = (uint8_t *)src_bits;
++    int bpp;
++
++    if (src_bpp != dst_bpp || src_bpp & 7)
++	return FALSE;
++
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    pixman_blt_helper (src_bytes + src_y * src_stride + src_x * bpp,
++                       dst_bytes + dst_y * dst_stride + dst_x * bpp,
++                       src_stride,
++                       dst_stride,
++                       width,
++                       height);
++    return TRUE;
+ }
+ 
+ static pixman_bool_t
+diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
+index 60060a9..5369ad9 100644
+--- a/pixman/pixman-private.h
++++ b/pixman/pixman-private.h
+@@ -10,6 +10,7 @@
+ 
+ #include "pixman.h"
+ #include <time.h>
++#include <string.h>
+ #include <assert.h>
+ #include <stdio.h>
+ #include <string.h>
+@@ -899,4 +900,46 @@ void pixman_timer_register (pixman_timer_t *timer);
+ 
+ #endif /* PIXMAN_TIMERS */
+ 
++/* a helper function, can blit 8-bit images with src/dst overlapping support */
++static inline void
++pixman_blt_helper (uint8_t *src_bytes,
++                   uint8_t *dst_bytes,
++                   int      src_stride,
++                   int      dst_stride,
++                   int      width,
++                   int      height)
++{
++    /*
++     * The second part of this check is not strictly needed, but it prevents
++     * unnecessary upside-down processing of areas which belong to different
++     * images. Upside-down processing can be slower with fixed-distance-ahead
++     * prefetch and perceived as having more tearing.
++     */
++    if (src_bytes < dst_bytes + width &&
++	src_bytes + src_stride * height > dst_bytes)
++    {
++	src_bytes += src_stride * height - src_stride;
++	dst_bytes += dst_stride * height - dst_stride;
++	dst_stride = -dst_stride;
++	src_stride = -src_stride;
++	/* Horizontal scrolling to the left needs memmove */
++	if (src_bytes + width > dst_bytes)
++	{
++	    while (--height >= 0)
++	    {
++		memmove (dst_bytes, src_bytes, width);
++		dst_bytes += dst_stride;
++		src_bytes += src_stride;
++	    }
++	    return;
++	}
++    }
++    while (--height >= 0)
++    {
++	memcpy (dst_bytes, src_bytes, width);
++	dst_bytes += dst_stride;
++	src_bytes += src_stride;
++    }
++}
++
+ #endif /* PIXMAN_PRIVATE_H */
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch b/recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
new file mode 100644
index 0000000000..7c0f7ad5bd
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0038-Support-of-overlapping-src-dst-for-pixman_blt_mmx.patch
@@ -0,0 +1,91 @@
+From 2cde9110695c2b595eaf885eee40b118286652f9 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:45:47 +0300
+Subject: [PATCH 38/40] Support of overlapping src/dst for pixman_blt_mmx
+
+---
+ pixman/pixman-mmx.c |   55 +++++++++++++++++++++++++++++---------------------
+ 1 files changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
+index 0272347..5bcbd0e 100644
+--- a/pixman/pixman-mmx.c
++++ b/pixman/pixman-mmx.c
+@@ -2996,34 +2996,43 @@ pixman_blt_mmx (uint32_t *src_bits,
+ {
+     uint8_t *   src_bytes;
+     uint8_t *   dst_bytes;
+-    int byte_width;
++    int         bpp;
+ 
+-    if (src_bpp != dst_bpp)
++    if (src_bpp != dst_bpp || src_bpp & 7)
+ 	return FALSE;
+ 
+-    if (src_bpp == 16)
+-    {
+-	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+-	src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+-	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-	byte_width = 2 * width;
+-	src_stride *= 2;
+-	dst_stride *= 2;
+-    }
+-    else if (src_bpp == 32)
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++    if (src_bpp != 16 && src_bpp != 32)
+     {
+-	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+-	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+-	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-	byte_width = 4 * width;
+-	src_stride *= 4;
+-	dst_stride *= 4;
++	pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++	                   width, height);
++	return TRUE;
+     }
+-    else
++
++    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
+     {
+-	return FALSE;
++	src_bytes += src_stride * height - src_stride;
++	dst_bytes += dst_stride * height - dst_stride;
++	dst_stride = -dst_stride;
++	src_stride = -src_stride;
++
++	if (src_bytes + width > dst_bytes)
++	{
++	    /* TODO: reverse scanline copy using MMX */
++	    while (--height >= 0)
++	    {
++		memmove (dst_bytes, src_bytes, width);
++		dst_bytes += dst_stride;
++		src_bytes += src_stride;
++	    }
++	    return TRUE;
++	}
+     }
+ 
+     while (height--)
+@@ -3033,7 +3042,7 @@ pixman_blt_mmx (uint32_t *src_bits,
+ 	uint8_t *d = dst_bytes;
+ 	src_bytes += src_stride;
+ 	dst_bytes += dst_stride;
+-	w = byte_width;
++	w = width;
+ 
+ 	while (w >= 2 && ((unsigned long)d & 3))
+ 	{
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch b/recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
new file mode 100644
index 0000000000..8e89ffeabb
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0039-Support-of-overlapping-src-dst-for-pixman_blt_sse2.patch
@@ -0,0 +1,91 @@
+From b4064e256d293d32035494a6afff1bc9456b84e1 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Thu, 22 Oct 2009 05:45:54 +0300
+Subject: [PATCH 39/40] Support of overlapping src/dst for pixman_blt_sse2
+
+---
+ pixman/pixman-sse2.c |   55 +++++++++++++++++++++++++++++--------------------
+ 1 files changed, 32 insertions(+), 23 deletions(-)
+
+diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
+index 533b858..9fa7191 100644
+--- a/pixman/pixman-sse2.c
++++ b/pixman/pixman-sse2.c
+@@ -4691,34 +4691,43 @@ pixman_blt_sse2 (uint32_t *src_bits,
+ {
+     uint8_t *   src_bytes;
+     uint8_t *   dst_bytes;
+-    int byte_width;
++    int         bpp;
+ 
+-    if (src_bpp != dst_bpp)
++    if (src_bpp != dst_bpp || src_bpp & 7)
+ 	return FALSE;
+ 
+-    if (src_bpp == 16)
+-    {
+-	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+-	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+-	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-	byte_width = 2 * width;
+-	src_stride *= 2;
+-	dst_stride *= 2;
+-    }
+-    else if (src_bpp == 32)
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++    if (src_bpp != 16 && src_bpp != 32)
+     {
+-	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+-	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+-	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+-	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+-	byte_width = 4 * width;
+-	src_stride *= 4;
+-	dst_stride *= 4;
++	pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++	                   width, height);
++	return TRUE;
+     }
+-    else
++
++    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
+     {
+-	return FALSE;
++	src_bytes += src_stride * height - src_stride;
++	dst_bytes += dst_stride * height - dst_stride;
++	dst_stride = -dst_stride;
++	src_stride = -src_stride;
++
++	if (src_bytes + width > dst_bytes)
++	{
++	    /* TODO: reverse scanline copy using SSE2 */
++	    while (--height >= 0)
++	    {
++		memmove (dst_bytes, src_bytes, width);
++		dst_bytes += dst_stride;
++		src_bytes += src_stride;
++	    }
++	    return TRUE;
++	}
+     }
+ 
+     while (height--)
+@@ -4728,7 +4737,7 @@ pixman_blt_sse2 (uint32_t *src_bits,
+ 	uint8_t *d = dst_bytes;
+ 	src_bytes += src_stride;
+ 	dst_bytes += dst_stride;
+-	w = byte_width;
++	w = width;
+ 
+ 	while (w >= 2 && ((unsigned long)d & 3))
+ 	{
+-- 
+1.6.6.1
+
diff --git a/recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch b/recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
new file mode 100644
index 0000000000..38aeadb2dc
--- /dev/null
+++ b/recipes/xorg-lib/pixman-0.21.6/0040-Support-of-overlapping-src-dst-for-pixman_blt_neon.patch
@@ -0,0 +1,94 @@
+From ed32d593a0e8aa56f8a27f976f188d14a79343a0 Mon Sep 17 00:00:00 2001
+From: Siarhei Siamashka <siarhei.siamashka@nokia.com>
+Date: Wed, 18 Nov 2009 06:08:48 +0200
+Subject: [PATCH 40/40] Support of overlapping src/dst for pixman_blt_neon
+
+---
+ pixman/pixman-arm-neon.c |   62 +++++++++++++++++++++++++++++++++++++--------
+ 1 files changed, 51 insertions(+), 11 deletions(-)
+
+diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
+index 0a10ca1..f015eee 100644
+--- a/pixman/pixman-arm-neon.c
++++ b/pixman/pixman-arm-neon.c
+@@ -215,26 +215,66 @@ pixman_blt_neon (uint32_t *src_bits,
+                  int       width,
+                  int       height)
+ {
+-    if (src_bpp != dst_bpp)
++    uint8_t *   src_bytes;
++    uint8_t *   dst_bytes;
++    int         bpp;
++
++    if (src_bpp != dst_bpp || src_bpp & 7)
+ 	return FALSE;
+ 
++    bpp = src_bpp >> 3;
++    width *= bpp;
++    src_stride *= 4;
++    dst_stride *= 4;
++    src_bytes = (uint8_t *)src_bits + src_y * src_stride + src_x * bpp;
++    dst_bytes = (uint8_t *)dst_bits + dst_y * dst_stride + dst_x * bpp;
++
++    if (src_bpp != 16 && src_bpp != 32)
++    {
++	pixman_blt_helper (src_bytes, dst_bytes, src_stride, dst_stride,
++	                   width, height);
++	return TRUE;
++    }
++
++    if (src_bytes < dst_bytes && src_bytes + src_stride * height > dst_bytes)
++    {
++	src_bytes += src_stride * height - src_stride;
++	dst_bytes += dst_stride * height - dst_stride;
++	dst_stride = -dst_stride;
++	src_stride = -src_stride;
++
++	if (src_bytes + width > dst_bytes)
++	{
++	    /* TODO: reverse scanline copy using NEON */
++	    while (--height >= 0)
++	    {
++		memmove (dst_bytes, src_bytes, width);
++		dst_bytes += dst_stride;
++		src_bytes += src_stride;
++	    }
++	    return TRUE;
++	}
++    }
++
+     switch (src_bpp)
+     {
+     case 16:
+ 	pixman_composite_src_0565_0565_asm_neon (
+-		width, height,
+-		(uint16_t *)(((char *) dst_bits) +
+-		dst_y * dst_stride * 4 + dst_x * 2), dst_stride * 2,
+-		(uint16_t *)(((char *) src_bits) +
+-		src_y * src_stride * 4 + src_x * 2), src_stride * 2);
++		width >> 1,
++		height,
++		(uint16_t *) dst_bytes,
++		dst_stride >> 1,
++		(uint16_t *) src_bytes,
++		src_stride >> 1);
+ 	return TRUE;
+     case 32:
+ 	pixman_composite_src_8888_8888_asm_neon (
+-		width, height,
+-		(uint32_t *)(((char *) dst_bits) +
+-		dst_y * dst_stride * 4 + dst_x * 4), dst_stride,
+-		(uint32_t *)(((char *) src_bits) +
+-		src_y * src_stride * 4 + src_x * 4), src_stride);
++		width >> 2,
++		height,
++		(uint32_t *) dst_bytes,
++		dst_stride >> 2,
++		(uint32_t *) src_bytes,
++		src_stride >> 2);
+ 	return TRUE;
+     default:
+ 	return FALSE;
+-- 
+1.6.6.1
+
author	Koen Kooi <koen@openembedded.org>	2011-04-05 13:00:12 +0200
committer	Koen Kooi <koen@openembedded.org>	2011-04-05 15:07:59 +0200
commit	c3265b14b23e1aec54f7794e753b28f0d0622d86 (patch)
tree	c84f6cda614e47d02328b19eccb99bb6c6b34aeb /recipes/xorg-lib/pixman-0.21.6
parent	84f0436d63aef5fce34eb0c6d5b07a4e312b7049 (diff)
download	openembedded-c3265b14b23e1aec54f7794e753b28f0d0622d86.tar.gz