@@ -27,31 +27,32 @@
#include "startcode.h"
#include "config.h"
+#include "libavutil/intreadwrite.h"
int ff_startcode_find_candidate_c(const uint8_t *buf, int size)
{
- int i = 0;
+ const uint8_t *start = buf, *end = buf + size;
+
#if HAVE_FAST_UNALIGNED
- /* we check i < size instead of i + 3 / 7 because it is
- * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
- * bytes at the end.
- */
+#define READ(bitness) AV_RN ## bitness
+#define MAIN_LOOP(bitness, mask1, mask2) do { \
+ /* we check p < end instead of p + 3 / 7 because it is
+ * simpler and there must be AV_INPUT_BUFFER_PADDING_SIZE
+ * bytes at the end. */ \
+ for (; buf < end; buf += bitness / 8) \
+ if ((~READ(bitness)(buf) & (READ(bitness)(buf) - mask1)) \
+ & mask2) \
+ break; \
+ } while (0)
+
#if HAVE_FAST_64BIT
- while (i < size &&
- !((~*(const uint64_t *)(buf + i) &
- (*(const uint64_t *)(buf + i) - 0x0101010101010101ULL)) &
- 0x8080808080808080ULL))
- i += 8;
+ MAIN_LOOP(64, 0x0101010101010101ULL, 0x8080808080808080ULL);
#else
- while (i < size &&
- !((~*(const uint32_t *)(buf + i) &
- (*(const uint32_t *)(buf + i) - 0x01010101U)) &
- 0x80808080U))
- i += 4;
+ MAIN_LOOP(32, 0x01010101U, 0x80808080U);
#endif
#endif
- for (; i < size; i++)
- if (!buf[i])
+ for (; buf < end; buf++)
+ if (!*buf)
break;
- return i;
+ return buf - start;
}
The reasons are cosmetics and preparation for future patches that will have even more cases and whose performance improves when switching to direct pointer arithmetic: Benchmarks have shown that using pointers directly instead of indexing to access the array to be about 5% faster (33665 vs. 31806 for a 7.4 Mb/s H.264 file based on 10 iterations of 131072 runs each; and 244356 vs 233373 for a 30.2 Mb/s H.264 file based on 10 iterations with 8192 runs each). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com> --- libavcodec/startcode.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-)