diff options
Diffstat (limited to 'media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch')
-rw-r--r-- | media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch | 293 |
1 files changed, 293 insertions, 0 deletions
diff --git a/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch new file mode 100644 index 0000000..6737811 --- /dev/null +++ b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch @@ -0,0 +1,293 @@ +From 537d185b9e9b25f7dacb5e5c4dab47bb8524da34 Mon Sep 17 00:00:00 2001 +From: Rob Clark <rob@ti.com> +Date: Thu, 8 Apr 2010 00:30:25 -0500 +Subject: [PATCH 11/24] add some neon + +--- + configure.ac | 1 + + gst/stride/Makefile.am | 1 + + gst/stride/armv7.s | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ + gst/stride/convert.c | 76 ++++++++++++++++-------------- + 4 files changed, 162 insertions(+), 35 deletions(-) + create mode 100644 gst/stride/armv7.s + +diff --git a/configure.ac b/configure.ac +index af6cd52..8e7ba18 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -58,6 +58,7 @@ dnl AS_LIBTOOL_TAGS + + AC_LIBTOOL_WIN32_DLL + AM_PROG_LIBTOOL ++AM_PROG_AS + + dnl *** required versions of GStreamer stuff *** + GST_REQ=0.10.32 +diff --git a/gst/stride/Makefile.am b/gst/stride/Makefile.am +index 0b61d55..3b466de 100644 +--- a/gst/stride/Makefile.am ++++ b/gst/stride/Makefile.am +@@ -3,6 +3,7 @@ plugin_LTLIBRARIES = libgststridetransform.la + libgststridetransform_la_SOURCES = \ + gststridetransform.c \ + convert.c \ ++ armv7.s \ + plugin.c + + libgststridetransform_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) +diff --git a/gst/stride/armv7.s b/gst/stride/armv7.s +new file mode 100644 +index 0000000..ed636f7 +--- /dev/null ++++ b/gst/stride/armv7.s +@@ -0,0 +1,119 @@ ++@ GStreamer ++@ ++@ Copyright (C) 2009 Texas Instruments, Inc - http://www.ti.com/ ++@ ++@ Description: NEON/VFP accelerated functions for armv7 architecture ++@ Created on: Nov 27, 2009 ++@ Author: Rob Clark <rob@ti.com> ++@ ++@ This library is free software; you can redistribute it and/or ++@ modify it under the terms of the GNU Library General Public ++@ License as published by the Free Software Foundation; either ++@ version 2 of the License, or (at your option) any later version. ++@ ++@ This library is distributed in the hope that it will be useful, ++@ but WITHOUT ANY WARRANTY; without even the implied warranty of ++@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++@ Library General Public License for more details. ++@ ++@ You should have received a copy of the GNU Library General Public ++@ License along with this library; if not, write to the ++@ Free Software Foundation, Inc., 59 Temple Place - Suite 330, ++@ Boston, MA 02111-1307, USA. ++ ++ .fpu neon ++ .text ++ ++ .align ++ .global stride_copy_zip2 ++ .type stride_copy_zip2, %function ++@void ++@stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz) ++@{ ++@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved ++stride_copy_zip2: ++@ interleave remaining >= 16 bytes: ++ pld [r1, #64] ++ pld [r2, #64] ++ cmp r3, #16 ++ blt stride_copy_zip2_2 ++stride_copy_zip2_1: ++ vld1.8 {q8}, [r1]! ++ vld1.8 {q9}, [r2]! ++ ++ vzip.8 q8, q9 ++ ++ pld [r1, #64] ++ vst1.8 {q8,q9}, [r0]! ++ pld [r2, #64] ++ sub r3, r3, #16 ++ ++ cmp r3, #16 ++ bge stride_copy_zip2_1 ++@ interleave remaining >= 8 bytes: ++stride_copy_zip2_2: ++ cmp r3, #8 ++ blt stride_copy_zip2_3 ++ ++ vld1.8 {d16}, [r1]! ++ vld1.8 {d17}, [r2]! ++ ++ vzip.8 d16, d17 ++ ++ vst1.8 {d16,d17}, [r0]! ++ sub r3, r3, #8 ++ ++@ interleave remaining < 8 bytes: ++stride_copy_zip2_3: ++@XXX ++ bx lr ++@} ++ ++ .align ++ .global stride_copy ++ .type stride_copy, %function ++@void ++@stride_copy (guchar *new_buf, guchar *orig_buf, gint sz) ++@{ ++@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved ++stride_copy: ++@ copy remaining >= 64 bytes: ++ pld [r1, #64] ++ cmp r2, #64 ++ blt stride_copy_2 ++stride_copy_1: ++ vld1.8 {q8-q9}, [r1]! ++ sub r2, r2, #64 ++ vld1.8 {q10-q11},[r1]! ++ vst1.8 {q8-q9}, [r0]! ++ pld [r1, #64] ++ cmp r2, #64 ++ vst1.8 {q10-q11},[r0]! ++ bge stride_copy_1 ++@ copy remaining >= 32 bytes: ++stride_copy_2: ++ cmp r2, #32 ++ blt stride_copy_3 ++ vld1.8 {q8-q9}, [r1]! ++ sub r2, r2, #32 ++ vst1.8 {q8-q9}, [r0]! ++@ copy remaining >= 16 bytes: ++stride_copy_3: ++ cmp r2, #16 ++ blt stride_copy_4 ++ vld1.8 {q8}, [r1]! ++ sub r2, r2, #16 ++ vst1.8 {q8}, [r0]! ++@ copy remaining >= 8 bytes: ++stride_copy_4: ++ cmp r2, #8 ++ blt stride_copy_5 ++ vld1.8 {d16}, [r1]! ++ sub r2, r2, #8 ++ vst1.8 {d16}, [r0]! ++@ copy remaining < 8 bytes: ++stride_copy_5: ++@XXX ++ bx lr ++@} ++ +diff --git a/gst/stride/convert.c b/gst/stride/convert.c +index 860f16c..a15063b 100644 +--- a/gst/stride/convert.c ++++ b/gst/stride/convert.c +@@ -37,38 +37,43 @@ GST_DEBUG_CATEGORY_EXTERN (stridetransform_debug); + #define GST_CAT_DEFAULT stridetransform_debug + + ++/* note: some parts of code support in-place transform.. some do not.. I'm ++ * not sure if zip/interleave functions could really support in-place copy.. ++ * I need to think about this after having some sleep ;-) ++ */ ++ ++#define WEAK __attribute__((weak)) ++ + /* + * Conversion utilities: + */ + +-static void +-memmove_demux (guchar *new_buf, guchar *orig_buf, gint sz, gint pxstride) ++WEAK void ++stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz) + { +- if (new_buf > orig_buf) { +- /* copy backwards */ +- new_buf += ((sz - 1) * pxstride); +- orig_buf += sz - 1; +- while(sz--) { +- *new_buf = *orig_buf; +- new_buf -= pxstride; +- orig_buf--; +- } +- } else { +- while(sz--) { +- *new_buf = *orig_buf; +- new_buf += pxstride; +- orig_buf++; +- } ++ while (sz--) { ++ *new_buf++ = *orig_buf1++; ++ *new_buf++ = *orig_buf2++; + } + } + ++WEAK void ++stride_copy (guchar *new_buf, guchar *orig_buf, gint sz) ++{ ++ memcpy (new_buf, orig_buf, sz); ++} ++ ++ ++/** ++ * move to strided buffer, interleaving two planes of identical dimensions ++ */ + static void +-stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width, gint height, gint pxstride) ++stridemove_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint new_width, gint orig_width, gint height) + { + int row; + +- GST_DEBUG ("new_buf=%p, orig_buf=%p, new_width=%d, orig_width=%d, height=%d", +- new_buf, orig_buf, new_width, orig_width, height); ++ GST_DEBUG ("new_buf=%p, orig_buf1=%p, orig_buf2=%p, new_width=%d, orig_width=%d, height=%d", ++ new_buf, orig_buf1, orig_buf2, new_width, orig_width, height); + + /* if increasing the stride, work from bottom-up to avoid overwriting data + * that has not been moved yet.. otherwise, work in the opposite order, +@@ -76,11 +81,19 @@ stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_w + */ + if (new_width > orig_width) { + for (row=height-1; row>=0; row--) { +- memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width, pxstride); ++ stride_copy_zip2 ( ++ new_buf+(new_width*row), ++ orig_buf1+(orig_width*row), ++ orig_buf2+(orig_width*row), ++ orig_width); + } + } else { + for (row=0; row<height; row++) { +- memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width, pxstride); ++ stride_copy_zip2 ( ++ new_buf+(new_width*row), ++ orig_buf1+(orig_width*row), ++ orig_buf2+(orig_width*row), ++ new_width); + } + } + } +@@ -106,11 +119,11 @@ stridemove (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width, + */ + if (new_width > orig_width) { + for (row=height-1; row>=0; row--) { +- memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width); ++ stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width); + } + } else { + for (row=0; row<height; row++) { +- memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width); ++ stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width); + } + } + } +@@ -234,19 +247,12 @@ stridify_i420_nv12 (GstStrideTransform *self, guchar *strided, guchar *unstrided + + g_return_val_if_fail (stride >= width, GST_FLOW_ERROR); + +- /* note: if not an in-place conversion, then doing the U&V in one pass +- * would be more efficient... but if it is an in-place conversion, I'd +- * need to think about whether it is potential for the new UV plane to +- * corrupt the V plane before it is done copying.. +- */ +- stridemove_demux ( +- strided + (height*stride) + 1, +- unstrided + (int)(height*width*1.25), +- stride, width/2, height/2, 2); /* move V */ +- stridemove_demux ( ++ /* XXX widths/heights/strides that are not multiple of four??: */ ++ stridemove_zip2 ( + strided + (height*stride), + unstrided + (height*width), +- stride, width/2, height/2, 2); /* move U */ ++ unstrided + (int)(height*width*1.25), ++ stride, width/2, height/2); /* interleave U&V */ + stridemove (strided, unstrided, stride, width, height); /* move Y */ + + return GST_FLOW_OK; +-- +1.7.1 + |