aboutsummaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
diff options
context:
space:
mode:
authorKoen Kooi <koen@dominion.thruhere.net>2011-10-18 11:01:20 +0200
committerKoen Kooi <koen@dominion.thruhere.net>2011-10-18 11:18:12 +0200
commitef33e684652db8b8fbc381a8ddd8e90c02f502df (patch)
tree113baa0ea80c4c48f3c0bead6af56fe78fe9dfb6 /meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
parent7b6e75d043a833b059ec6a39bf998999ef8f3d9e (diff)
downloadmeta-openembedded-contrib-ef33e684652db8b8fbc381a8ddd8e90c02f502df.tar.gz
pixman 0.23.6: refresh patches with versions for pixman master
Signed-off-by: Koen Kooi <koen@dominion.thruhere.net>
Diffstat (limited to 'meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch')
-rw-r--r--meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch235
1 files changed, 0 insertions, 235 deletions
diff --git a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch b/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
deleted file mode 100644
index dc8a69f7497..00000000000
--- a/meta-oe/recipes-graphics/xorg-lib/pixman-0.23.6/0004-ARM-NEON-Bilinear-macro-template-for-instruction-sch.patch
+++ /dev/null
@@ -1,235 +0,0 @@
-From 524d1cc7acb753167fffdd08d8c10bf71e0634ba Mon Sep 17 00:00:00 2001
-From: Taekyun Kim <tkq.kim@samsung.com>
-Date: Tue, 20 Sep 2011 21:32:35 +0900
-Subject: [PATCH 4/8] ARM: NEON: Bilinear macro template for instruction scheduling
-
-This macro template takes 6 code blocks.
-
-1. process_last_pixel
-2. process_two_pixels
-3. process_four_pixels
-4. process_pixblock_head
-5. process_pixblock_tail
-6. process_pixblock_tail_head
-
-process_last_pixel does not need to update horizontal weight. This
-is done by the template. two and four code block should update
-horizontal weight inside of them. head/tail/tail_head blocks
-consist unrolled core loop. You can apply instruction scheduling
-to the tail_head blocks.
-
-You can also specify size of the pixel block. Supported size is 4
-and 8. If you want to use mask, give BILINEAR_FLAG_USE_MASK flags
-to the template, then you can use register MASK. When using d8~d15
-registers, give BILINEAR_FLAG_USE_ALL_NEON_REGS to make sure
-registers are properly saved on the stack and later restored.
----
- pixman/pixman-arm-neon-asm-bilinear.S | 195 +++++++++++++++++++++++++++++++++
- 1 files changed, 195 insertions(+), 0 deletions(-)
-
-diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
-index c5ba929..784e5df 100644
---- a/pixman/pixman-arm-neon-asm-bilinear.S
-+++ b/pixman/pixman-arm-neon-asm-bilinear.S
-@@ -773,3 +773,198 @@ generate_bilinear_scanline_func_src_a8_dst \
- generate_bilinear_scanline_func_src_a8_dst \
- pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_neon, \
- 8888, 8888, add, 2, 28
-+
-+.set BILINEAR_FLAG_USE_MASK, 1
-+.set BILINEAR_FLAG_USE_ALL_NEON_REGS, 2
-+
-+/*
-+ * Main template macro for generating NEON optimized bilinear scanline functions.
-+ *
-+ * Bilinear scanline generator macro take folling arguments:
-+ * fname - name of the function to generate
-+ * src_fmt - source color format (8888 or 0565)
-+ * dst_fmt - destination color format (8888 or 0565)
-+ * src/dst_bpp_shift - (1 << bpp_shift) is the size of src/dst pixel in bytes
-+ * process_last_pixel - code block that interpolate one pixel and does not
-+ * update horizontal weight
-+ * process_two_pixels - code block that interpolate two pixels and update
-+ * horizontal weight
-+ * process_four_pixels - code block that interpolate four pixels and update
-+ * horizontal weight
-+ * process_pixblock_head - head part of middle loop
-+ * process_pixblock_tail - tail part of middle loop
-+ * process_pixblock_tail_head - tail_head of middle loop
-+ * pixblock_size - number of pixels processed in a single middle loop
-+ * prefetch_distance - prefetch in the source image by that many pixels ahead
-+ */
-+
-+.macro generate_bilinear_scanline_func \
-+ fname, \
-+ src_fmt, dst_fmt, src_bpp_shift, dst_bpp_shift, \
-+ bilinear_process_last_pixel, \
-+ bilinear_process_two_pixels, \
-+ bilinear_process_four_pixels, \
-+ bilinear_process_pixblock_head, \
-+ bilinear_process_pixblock_tail, \
-+ bilinear_process_pixblock_tail_head, \
-+ pixblock_size, \
-+ prefetch_distance, \
-+ flags
-+
-+pixman_asm_function fname
-+.if pixblock_size == 8
-+.elseif pixblock_size == 4
-+.else
-+ .error unsupported pixblock size
-+.endif
-+
-+.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
-+ OUT .req r0
-+ TOP .req r1
-+ BOTTOM .req r2
-+ WT .req r3
-+ WB .req r4
-+ X .req r5
-+ UX .req r6
-+ WIDTH .req ip
-+ TMP1 .req r3
-+ TMP2 .req r4
-+ PF_OFFS .req r7
-+ TMP3 .req r8
-+ TMP4 .req r9
-+ STRIDE .req r2
-+
-+ mov ip, sp
-+ push {r4, r5, r6, r7, r8, r9}
-+ mov PF_OFFS, #prefetch_distance
-+ ldmia ip, {WB, X, UX, WIDTH}
-+.else
-+ OUT .req r0
-+ MASK .req r1
-+ TOP .req r2
-+ BOTTOM .req r3
-+ WT .req r4
-+ WB .req r5
-+ X .req r6
-+ UX .req r7
-+ WIDTH .req ip
-+ TMP1 .req r4
-+ TMP2 .req r5
-+ PF_OFFS .req r8
-+ TMP3 .req r9
-+ TMP4 .req r10
-+ STRIDE .req r3
-+
-+ mov ip, sp
-+ push {r4, r5, r6, r7, r8, r9, r10, ip}
-+ mov PF_OFFS, #prefetch_distance
-+ ldmia ip, {WT, WB, X, UX, WIDTH}
-+.endif
-+
-+ mul PF_OFFS, PF_OFFS, UX
-+
-+.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
-+ vpush {d8-d15}
-+.endif
-+
-+ sub STRIDE, BOTTOM, TOP
-+ .unreq BOTTOM
-+
-+ cmp WIDTH, #0
-+ ble 3f
-+
-+ vdup.u16 q12, X
-+ vdup.u16 q13, UX
-+ vdup.u8 d28, WT
-+ vdup.u8 d29, WB
-+ vadd.u16 d25, d25, d26
-+
-+ /* ensure good destination alignment */
-+ cmp WIDTH, #1
-+ blt 0f
-+ tst OUT, #(1 << dst_bpp_shift)
-+ beq 0f
-+ vshr.u16 q15, q12, #8
-+ vadd.u16 q12, q12, q13
-+ bilinear_process_last_pixel
-+ sub WIDTH, WIDTH, #1
-+0:
-+ vadd.u16 q13, q13, q13
-+ vshr.u16 q15, q12, #8
-+ vadd.u16 q12, q12, q13
-+
-+ cmp WIDTH, #2
-+ blt 0f
-+ tst OUT, #(1 << (dst_bpp_shift + 1))
-+ beq 0f
-+ bilinear_process_two_pixels
-+ sub WIDTH, WIDTH, #2
-+0:
-+.if pixblock_size == 8
-+ cmp WIDTH, #4
-+ blt 0f
-+ tst OUT, #(1 << (dst_bpp_shift + 2))
-+ beq 0f
-+ bilinear_process_four_pixels
-+ sub WIDTH, WIDTH, #4
-+0:
-+.endif
-+ subs WIDTH, WIDTH, #pixblock_size
-+ blt 1f
-+ mov PF_OFFS, PF_OFFS, asr #(16 - src_bpp_shift)
-+ bilinear_process_pixblock_head
-+ subs WIDTH, WIDTH, #pixblock_size
-+ blt 5f
-+0:
-+ bilinear_process_pixblock_tail_head
-+ subs WIDTH, WIDTH, #pixblock_size
-+ bge 0b
-+5:
-+ bilinear_process_pixblock_tail
-+1:
-+.if pixblock_size == 8
-+ tst WIDTH, #4
-+ beq 2f
-+ bilinear_process_four_pixels
-+2:
-+.endif
-+ /* handle the remaining trailing pixels */
-+ tst WIDTH, #2
-+ beq 2f
-+ bilinear_process_two_pixels
-+2:
-+ tst WIDTH, #1
-+ beq 3f
-+ bilinear_process_last_pixel
-+3:
-+.if ((flags) & BILINEAR_FLAG_USE_ALL_NEON_REGS) != 0
-+ vpop {d8-d15}
-+.endif
-+
-+.if ((flags) & BILINEAR_FLAG_USE_MASK) == 0
-+ pop {r4, r5, r6, r7, r8, r9}
-+.else
-+ pop {r4, r5, r6, r7, r8, r9, r10, ip}
-+.endif
-+ bx lr
-+
-+ .unreq OUT
-+ .unreq TOP
-+ .unreq WT
-+ .unreq WB
-+ .unreq X
-+ .unreq UX
-+ .unreq WIDTH
-+ .unreq TMP1
-+ .unreq TMP2
-+ .unreq PF_OFFS
-+ .unreq TMP3
-+ .unreq TMP4
-+ .unreq STRIDE
-+.if ((flags) & BILINEAR_FLAG_USE_MASK) != 0
-+ .unreq MASK
-+.endif
-+
-+.endfunc
-+
-+.endm
---
-1.6.6.1
-