1 files changed, 293 insertions, 0 deletions
diff --git a/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch
new file mode 100644
index 0000000..6737811
--- /dev/null
+++ b/media-libs/gst-plugins-base/files/gst-0.10.32-0011-add-some-neon.patch
@@ -0,0 +1,293 @@
+From 537d185b9e9b25f7dacb5e5c4dab47bb8524da34 Mon Sep 17 00:00:00 2001
+From: Rob Clark <rob@ti.com>
+Date: Thu, 8 Apr 2010 00:30:25 -0500
+Subject: [PATCH 11/24] add some neon
+
+---
+ configure.ac           |    1 +
+ gst/stride/Makefile.am |    1 +
+ gst/stride/armv7.s     |  119 ++++++++++++++++++++++++++++++++++++++++++++++++
+ gst/stride/convert.c   |   76 ++++++++++++++++--------------
+ 4 files changed, 162 insertions(+), 35 deletions(-)
+ create mode 100644 gst/stride/armv7.s
+
+diff --git a/configure.ac b/configure.ac
+index af6cd52..8e7ba18 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -58,6 +58,7 @@ dnl AS_LIBTOOL_TAGS
+ 
+ AC_LIBTOOL_WIN32_DLL
+ AM_PROG_LIBTOOL
++AM_PROG_AS
+ 
+ dnl *** required versions of GStreamer stuff ***
+ GST_REQ=0.10.32
+diff --git a/gst/stride/Makefile.am b/gst/stride/Makefile.am
+index 0b61d55..3b466de 100644
+--- a/gst/stride/Makefile.am
++++ b/gst/stride/Makefile.am
+@@ -3,6 +3,7 @@ plugin_LTLIBRARIES = libgststridetransform.la
+ libgststridetransform_la_SOURCES = \
+ 	gststridetransform.c \
+ 	convert.c \
++	armv7.s \
+ 	plugin.c
+ 
+ libgststridetransform_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS)
+diff --git a/gst/stride/armv7.s b/gst/stride/armv7.s
+new file mode 100644
+index 0000000..ed636f7
+--- /dev/null
++++ b/gst/stride/armv7.s
+@@ -0,0 +1,119 @@
++@ GStreamer
++@
++@ Copyright (C) 2009 Texas Instruments, Inc - http://www.ti.com/
++@
++@ Description: NEON/VFP accelerated functions for armv7 architecture
++@  Created on: Nov 27, 2009
++@      Author: Rob Clark <rob@ti.com>
++@
++@ This library is free software; you can redistribute it and/or
++@ modify it under the terms of the GNU Library General Public
++@ License as published by the Free Software Foundation; either
++@ version 2 of the License, or (at your option) any later version.
++@
++@ This library is distributed in the hope that it will be useful,
++@ but WITHOUT ANY WARRANTY; without even the implied warranty of
++@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++@ Library General Public License for more details.
++@
++@ You should have received a copy of the GNU Library General Public
++@ License along with this library; if not, write to the
++@ Free Software Foundation, Inc., 59 Temple Place - Suite 330,
++@ Boston, MA 02111-1307, USA.
++
++       .fpu neon
++       .text
++
++       .align
++       .global stride_copy_zip2
++       .type   stride_copy_zip2, %function
++@void
++@stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz)
++@{
++@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved
++stride_copy_zip2:
++@ interleave remaining >= 16 bytes:
++       pld [r1, #64]
++       pld [r2, #64]
++       cmp r3, #16
++       blt stride_copy_zip2_2
++stride_copy_zip2_1:
++       vld1.8 {q8}, [r1]!
++       vld1.8 {q9}, [r2]!
++
++       vzip.8 q8, q9
++
++       pld [r1, #64]
++       vst1.8 {q8,q9}, [r0]!
++       pld [r2, #64]
++       sub r3, r3, #16
++
++       cmp r3, #16
++       bge stride_copy_zip2_1
++@ interleave remaining >= 8 bytes:
++stride_copy_zip2_2:
++       cmp r3, #8
++       blt stride_copy_zip2_3
++
++       vld1.8 {d16}, [r1]!
++       vld1.8 {d17}, [r2]!
++
++       vzip.8 d16, d17
++
++       vst1.8 {d16,d17}, [r0]!
++       sub r3, r3, #8
++
++@ interleave remaining < 8 bytes:
++stride_copy_zip2_3:
++@XXX
++       bx lr
++@}
++
++       .align
++       .global stride_copy
++       .type   stride_copy, %function
++@void
++@stride_copy (guchar *new_buf, guchar *orig_buf, gint sz)
++@{
++@@@@ note: r0-r3, q0-3, and q8-q15 do not need to be preserved
++stride_copy:
++@ copy remaining >= 64 bytes:
++       pld [r1, #64]
++       cmp r2, #64
++       blt stride_copy_2
++stride_copy_1:
++       vld1.8 {q8-q9},  [r1]!
++       sub r2, r2, #64
++       vld1.8 {q10-q11},[r1]!
++       vst1.8 {q8-q9},  [r0]!
++       pld [r1, #64]
++       cmp r2, #64
++       vst1.8 {q10-q11},[r0]!
++       bge stride_copy_1
++@ copy remaining >= 32 bytes:
++stride_copy_2:
++       cmp r2, #32
++       blt stride_copy_3
++       vld1.8 {q8-q9}, [r1]!
++       sub r2, r2, #32
++       vst1.8 {q8-q9}, [r0]!
++@ copy remaining >= 16 bytes:
++stride_copy_3:
++       cmp r2, #16
++       blt stride_copy_4
++       vld1.8 {q8}, [r1]!
++       sub r2, r2, #16
++       vst1.8 {q8}, [r0]!
++@ copy remaining >= 8 bytes:
++stride_copy_4:
++       cmp r2, #8
++       blt stride_copy_5
++       vld1.8 {d16}, [r1]!
++       sub r2, r2, #8
++       vst1.8 {d16}, [r0]!
++@ copy remaining < 8 bytes:
++stride_copy_5:
++@XXX
++       bx lr
++@}
++
+diff --git a/gst/stride/convert.c b/gst/stride/convert.c
+index 860f16c..a15063b 100644
+--- a/gst/stride/convert.c
++++ b/gst/stride/convert.c
+@@ -37,38 +37,43 @@ GST_DEBUG_CATEGORY_EXTERN (stridetransform_debug);
+ #define GST_CAT_DEFAULT stridetransform_debug
+ 
+ 
++/* note: some parts of code support in-place transform.. some do not..  I'm
++ * not sure if zip/interleave functions could really support in-place copy..
++ * I need to think about this after having some sleep ;-)
++ */
++
++#define WEAK __attribute__((weak))
++
+ /*
+  * Conversion utilities:
+  */
+ 
+-static void
+-memmove_demux (guchar *new_buf, guchar *orig_buf, gint sz, gint pxstride)
++WEAK void
++stride_copy_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint sz)
+ {
+-  if (new_buf > orig_buf) {
+-    /* copy backwards */
+-    new_buf += ((sz - 1) * pxstride);
+-    orig_buf += sz - 1;
+-    while(sz--) {
+-      *new_buf = *orig_buf;
+-      new_buf -= pxstride;
+-      orig_buf--;
+-    }
+-  } else {
+-    while(sz--) {
+-      *new_buf = *orig_buf;
+-      new_buf += pxstride;
+-      orig_buf++;
+-    }
++  while (sz--) {
++    *new_buf++ = *orig_buf1++;
++    *new_buf++ = *orig_buf2++;
+   }
+ }
+ 
++WEAK void
++stride_copy (guchar *new_buf, guchar *orig_buf, gint sz)
++{
++  memcpy (new_buf, orig_buf, sz);
++}
++
++
++/**
++ * move to strided buffer, interleaving two planes of identical dimensions
++ */
+ static void
+-stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width, gint height, gint pxstride)
++stridemove_zip2 (guchar *new_buf, guchar *orig_buf1, guchar *orig_buf2, gint new_width, gint orig_width, gint height)
+ {
+   int row;
+ 
+-  GST_DEBUG ("new_buf=%p, orig_buf=%p, new_width=%d, orig_width=%d, height=%d",
+-      new_buf, orig_buf, new_width, orig_width, height);
++  GST_DEBUG ("new_buf=%p, orig_buf1=%p, orig_buf2=%p, new_width=%d, orig_width=%d, height=%d",
++      new_buf, orig_buf1, orig_buf2, new_width, orig_width, height);
+ 
+   /* if increasing the stride, work from bottom-up to avoid overwriting data
+    * that has not been moved yet.. otherwise, work in the opposite order,
+@@ -76,11 +81,19 @@ stridemove_demux (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_w
+    */
+   if (new_width > orig_width) {
+     for (row=height-1; row>=0; row--) {
+-      memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width, pxstride);
++      stride_copy_zip2 (
++          new_buf+(new_width*row),
++          orig_buf1+(orig_width*row),
++          orig_buf2+(orig_width*row),
++          orig_width);
+     }
+   } else {
+     for (row=0; row<height; row++) {
+-      memmove_demux (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width, pxstride);
++      stride_copy_zip2 (
++          new_buf+(new_width*row),
++          orig_buf1+(orig_width*row),
++          orig_buf2+(orig_width*row),
++          new_width);
+     }
+   }
+ }
+@@ -106,11 +119,11 @@ stridemove (guchar *new_buf, guchar *orig_buf, gint new_width, gint orig_width,
+    */
+   if (new_width > orig_width) {
+     for (row=height-1; row>=0; row--) {
+-      memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width);
++      stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), orig_width);
+     }
+   } else {
+     for (row=0; row<height; row++) {
+-      memmove (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width);
++      stride_copy (new_buf+(new_width*row), orig_buf+(orig_width*row), new_width);
+     }
+   }
+ }
+@@ -234,19 +247,12 @@ stridify_i420_nv12 (GstStrideTransform *self, guchar *strided, guchar *unstrided
+ 
+   g_return_val_if_fail (stride >= width, GST_FLOW_ERROR);
+ 
+-  /* note: if not an in-place conversion, then doing the U&V in one pass
+-   * would be more efficient... but if it is an in-place conversion, I'd
+-   * need to think about whether it is potential for the new UV plane to
+-   * corrupt the V plane before it is done copying..
+-   */
+-  stridemove_demux (
+-      strided + (height*stride) + 1,
+-      unstrided + (int)(height*width*1.25),
+-      stride, width/2, height/2, 2);                        /* move V */
+-  stridemove_demux (
++  /* XXX widths/heights/strides that are not multiple of four??: */
++  stridemove_zip2 (
+       strided + (height*stride),
+       unstrided + (height*width),
+-      stride, width/2, height/2, 2);                        /* move U */
++      unstrided + (int)(height*width*1.25),
++      stride, width/2, height/2);                           /* interleave U&V */
+   stridemove (strided, unstrided, stride, width, height);   /* move Y */
+ 
+   return GST_FLOW_OK;
+-- 
+1.7.1
+