@@ -29,6 +29,90 @@
static const uint32_t pixel_mask[5] = { 0xffffffff, 0x01ff01ff, 0x03ff03ff, 0x0fff0fff, 0x3fff3fff };
static const uint32_t pixel_mask_lf[3] = { 0xff0fff0f, 0x01ff000f, 0x03ff000f };
+static const int depths[5] = { 8, 9, 10, 12, 14 };
+
+static void check_weight(void)
+{
+ static int heights[] = { 2, 4, 8, 16, };
+#define MAXHEIGHT 16
+ for (int d = 0; d < FF_ARRAY_ELEMS(depths); d++) {
+ const int bit_depth = depths[d];
+ const int offset = 128 - (rnd() & 0xff);
+ const int log_denom = rnd() % 7;
+ const int wa = rnd() & ((1 << log_denom) - 1);
+ const int wb = rnd() & ((1 << log_denom) - 1);
+ uint16_t ref[MAXHEIGHT * 128 * 2], src[MAXHEIGHT * 128 * 2];
+ uint16_t out0[MAXHEIGHT * 128 * 2], out1[MAXHEIGHT * 128 * 2];
+ uint8_t *const pref = (void *)ref;
+ uint8_t *const psrc = (void *)src;
+ uint8_t *const pout0 = (void *)out0;
+ uint8_t *const pout1 = (void *)out1;
+ H264DSPContext h;
+
+ ff_h264dsp_init(&h, bit_depth, 1);
+
+ for (size_t i = 0; i < FF_ARRAY_ELEMS(ref); i++)
+ if (bit_depth == 8) {
+ pref[i] = rnd();
+ psrc[i] = rnd();
+ } else {
+ ref[i] = rnd() & (0xffff >> (16 - d));
+ src[i] = rnd() & (0xffff >> (16 - d));
+ }
+
+ for (int w = 0; w < 4; w++) {
+ declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *block,
+ ptrdiff_t stride, int height, int log2_denom,
+ int weight, int offset);
+
+ if (check_func(h.weight_h264_pixels_tab[w], "h264_weight%d_%d",
+ 16 >> w, bit_depth)) {
+ for (int i = 0; i < FF_ARRAY_ELEMS(heights); i++) {
+ const int height = heights[i];
+
+ memcpy(out0, ref, sizeof (out0));
+ memcpy(out1, ref, sizeof (out1));
+
+ call_ref(pout0, 32 >> w, height, log_denom, wa, offset);
+ call_new(pout1, 32 >> w, height, log_denom, wa, offset);
+
+ if (memcmp(out0, out1, sizeof (ref)))
+ fail();
+ }
+
+ bench_new(pout1, 32 >> w, MAXHEIGHT, log_denom, wa, offset);
+ }
+ }
+
+ for (int w = 0; w < 4; w++) {
+ declare_func_emms(AV_CPU_FLAG_MMX, void, uint8_t *dst,
+ const uint8_t *src, ptrdiff_t stride, int height,
+ int log2_denom, int wd, int ws, int offset);
+
+ if (check_func(h.biweight_h264_pixels_tab[w], "h264_biweight%d_%d",
+ 16 >> w, bit_depth)) {
+ for (int i = 0; i < FF_ARRAY_ELEMS(heights); i++) {
+ const int height = heights[i];
+
+ memcpy(out0, ref, sizeof (out0));
+ memcpy(out1, ref, sizeof (out1));
+
+ call_ref(pout0, psrc, 32 >> w, height, log_denom, wa, wb,
+ offset);
+ call_new(pout1, psrc, 32 >> w, height, log_denom, wa, wb,
+ offset);
+
+ if (memcmp(out0, out1, sizeof (ref)))
+ fail();
+ }
+
+ bench_new(pout1, psrc, 32 >> w, MAXHEIGHT, log_denom, wa, wb,
+ offset);
+ }
+ }
+ }
+#undef HEIGHT
+}
#define SIZEOF_PIXEL ((bit_depth + 7) / 8)
#define SIZEOF_COEF (2 * ((bit_depth + 7) / 8))
@@ -173,7 +257,6 @@ static void dct8x8(int16_t *coef, int bit_depth)
static void check_idct(void)
{
- static const int depths[5] = { 8, 9, 10, 12, 14 };
LOCAL_ALIGNED_16(uint8_t, src, [8 * 8 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst, [8 * 8 * 2]);
LOCAL_ALIGNED_16(uint8_t, dst0, [8 * 8 * 2]);
@@ -451,6 +534,9 @@ static void check_loop_filter_intra(void)
void checkasm_check_h264dsp(void)
{
+ check_weight();
+ report("weight");
+
check_idct();
check_idct_multiple();
report("idct");